From 9e114e4d78f753c44b7dfa37327377449fd5d585 Mon Sep 17 00:00:00 2001 From: Guanheng Zhang Date: Mon, 8 Mar 2021 14:05:19 -0800 Subject: [PATCH 1/3] edit json file --- test/asset/raw_datasets.json | 100 ++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 49 deletions(-) diff --git a/test/asset/raw_datasets.json b/test/asset/raw_datasets.json index e48add06d2..20af644e3b 100644 --- a/test/asset/raw_datasets.json +++ b/test/asset/raw_datasets.json @@ -1,49 +1,51 @@ -{"dataset_name": "IMDB", "split": "train", "NUM_LINES": 25000, "MD5": "7c2ac02c03563afcf9b574c7e56c153a", "URL": "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", "first_line": ["neg", "I rented I AM CURIOUS-YELLOW from my video store because of all the controversy that surrounded it when it was first released in 1967. I also heard that at first it was seized by U.S. customs if it ever tried to enter this country, therefore being a fan of films considered \"controversial\" I really had to see this for myself.

The plot is centered around a young Swedish drama student named Lena who wants to learn everything she can about life. In particular she wants to focus her attentions to making some sort of documentary on what the average Swede thought about certain political issues such as the Vietnam War and race issues in the United States. In between asking politicians and ordinary denizens of Stockholm about their opinions on politics, she has sex with her drama teacher, classmates, and married men.

What kills me about I AM CURIOUS-YELLOW is that 40 years ago, this was considered pornographic. Really, the sex and nudity scenes are few and far between, even then it's not shot like some cheaply made porno. While my countrymen mind find it shocking, in reality sex and nudity are a major staple in Swedish cinema. Even Ingmar Bergman, arguably their answer to good old boy John Ford, had sex scenes in his films.

I do commend the filmmakers for the fact that any sex shown in the film is shown for artistic purposes rather than just to shock people and make money to be shown in pornographic theaters in America. I AM CURIOUS-YELLOW is a good film for anyone wanting to study the meat and potatoes (no pun intended) of Swedish cinema. But really, this film doesn't have much of a plot."]} -{"dataset_name": "IMDB", "split": "test", "NUM_LINES": 25000, "MD5": "7c2ac02c03563afcf9b574c7e56c153a", "URL": "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", "first_line": ["neg", "I love sci-fi and am willing to put up with a lot. Sci-fi movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original). Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn't match the background, and painfully one-dimensional characters cannot be overcome with a 'sci-fi' setting. (I'm sure there are those of you out there who think Babylon 5 is good sci-fi TV. It's not. It's clich\u00e9d and uninspiring.) While US viewers might like emotion and character development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may treat important issues, yet not as a serious philosophy. It's really difficult to care about the characters here as they are not simply foolish, just missing a spark of life. Their actions and reactions are wooden and predictable, often painful to watch. The makers of Earth KNOW it's rubbish as they have to always say \"Gene Roddenberry's Earth...\" otherwise people would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull, cheap, poorly edited (watching it without advert breaks really brings this home) trudging Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring him back as another actor. Jeeez! Dallas all over again."]} -{"dataset_name": "AG_NEWS", "split": "train", "NUM_LINES": 120000, "MD5": "b1a00f826fdfbd249f79597b59e1dc12", "URL": "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/train.csv", "first_line": [3, "Wall St. Bears Claw Back Into the Black (Reuters) Reuters - Short-sellers, Wall Street's dwindling\\band of ultra-cynics, are seeing green again."]} -{"dataset_name": "AG_NEWS", "split": "test", "NUM_LINES": 7600, "MD5": "d52ea96a97a2d943681189a97654912d", "URL": "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/test.csv", "first_line": [3, "Fears for T N pension after talks Unions representing workers at Turner Newall say they are 'disappointed' after talks with stricken parent firm Federal Mogul."]} -{"dataset_name": "SogouNews", "split": "train", "NUM_LINES": 450000, "MD5": "0c1700ba70b73f964dd8de569d3fd03e", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbUkVqNEszd0pHaFE", "first_line": [4, "2008 di4 qi1 jie4 qi1ng da3o guo2 ji4 che1 zha3n me3i nv3 mo2 te4 2008di4 qi1 jie4 qi1ng da3o guo2 ji4 che1 zha3n yu2 15 ri4 za4i qi1ng da3o guo2 ji4 hui4 zha3n zho1ng xi1n she4ng da4 ka1i mu4 . be3n ci4 che1 zha3n jia1ng chi2 xu4 da4o be3n yue4 19 ri4 . ji1n nia2n qi1ng da3o guo2 ji4 che1 zha3n shi4 li4 nia2n da3o che2ng che1 zha3n gui1 mo2 zui4 da4 di2 yi1 ci4 , shi3 yo4ng lia3o qi1ng da3o guo2 ji4 hui4 zha3n zho1ng xi1n di2 qua2n bu4 shi4 ne4i wa4i zha3n gua3n . yi3 xia4 we2i xia4n cha3ng mo2 te4 tu2 pia4n ."]} -{"dataset_name": "SogouNews", "split": "test", "NUM_LINES": 60000, "MD5": "0c1700ba70b73f964dd8de569d3fd03e", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbUkVqNEszd0pHaFE", "first_line": [1, " ti3 ca1o shi4 jie4 be1i : che2ng fe1i na2 pi2ng he2ng mu4 zi4 yo2u ca1o ji1n pa2i su4 du4 : ( shuo1 mi2ng : dia3n ji1 zi4 do4ng bo1 fa4ng )\\n shuo1 mi2ng : dia3n ji1 ga1i a4n niu3 , xua3n ze2 yi1 lu4n ta2n ji2 ke3 "]} -{"dataset_name": "DBpedia", "split": "train", "NUM_LINES": 560000, "MD5": "dca7b1ae12b1091090db52aa7ec5ca64", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k", "first_line": [1, "E. D. Abbott Ltd Abbott of Farnham E D Abbott Limited was a British coachbuilding business based in Farnham Surrey trading under that name from 1929. A major part of their output was under sub-contract to motor vehicle manufacturers. Their business closed in 1972."]} -{"dataset_name": "DBpedia", "split": "test", "NUM_LINES": 70000, "MD5": "dca7b1ae12b1091090db52aa7ec5ca64", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k", "first_line": [1, "TY KU TY KU /ta\u026aku\u02d0/ is an American alcoholic beverage company that specializes in sake and other spirits. The privately-held company was founded in 2004 and is headquartered in New York City New York. While based in New York TY KU's beverages are made in Japan through a joint venture with two sake breweries. Since 2011 TY KU's growth has extended its products into all 50 states."]} -{"dataset_name": "YelpReviewPolarity", "split": "train", "NUM_LINES": 560000, "MD5": "620c8ae4bd5a150b730f1ba9a7c6a4d3", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbNUpYQ2N3SGlFaDg", "first_line": [1, "Unfortunately, the frustration of being Dr. Goldberg's patient is a repeat of the experience I've had with so many other doctors in NYC -- good doctor, terrible staff. It seems that his staff simply never answers the phone. It usually takes 2 hours of repeated calling to get an answer. Who has time for that or wants to deal with it? I have run into this problem with many other doctors and I just don't get it. You have office workers, you have patients with medical needs, why isn't anyone answering the phone? It's incomprehensible and not work the aggravation. It's with regret that I feel that I have to give Dr. Goldberg 2 stars."]} -{"dataset_name": "YelpReviewPolarity", "split": "test", "NUM_LINES": 38000, "MD5": "620c8ae4bd5a150b730f1ba9a7c6a4d3", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbNUpYQ2N3SGlFaDg", "first_line": [2, "Contrary to other reviews, I have zero complaints about the service or the prices. I have been getting tire service here for the past 5 years now, and compared to my experience with places like Pep Boys, these guys are experienced and know what they're doing. \\nAlso, this is one place that I do not feel like I am being taken advantage of, just because of my gender. Other auto mechanics have been notorious for capitalizing on my ignorance of cars, and have sucked my bank account dry. But here, my service and road coverage has all been well explained - and let up to me to decide. \\nAnd they just renovated the waiting room. It looks a lot better than it did in previous years."]} -{"dataset_name": "YelpReviewFull", "split": "train", "NUM_LINES": 650000, "MD5": "f7ddfafed1033f68ec72b9267863af6c", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbZlU4dXhHTFhZQU0", "first_line": [5, "dr. goldberg offers everything i look for in a general practitioner. he's nice and easy to talk to without being patronizing; he's always on time in seeing his patients; he's affiliated with a top-notch hospital (nyu) which my parents have explained to me is very important in case something happens and you need surgery; and you can get referrals to see specialists without having to see him first. really, what more do you need? i'm sitting here trying to think of any complaints i have about him, but i'm really drawing a blank."]} -{"dataset_name": "YelpReviewFull", "split": "test", "NUM_LINES": 50000, "MD5": "f7ddfafed1033f68ec72b9267863af6c", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbZlU4dXhHTFhZQU0", "first_line": [1, "I got 'new' tires from them and within two weeks got a flat. I took my car to a local mechanic to see if i could get the hole patched, but they said the reason I had a flat was because the previous patch had blown - WAIT, WHAT? I just got the tire and never needed to have it patched? This was supposed to be a new tire. \\nI took the tire over to Flynn's and they told me that someone punctured my tire, then tried to patch it. So there are resentful tire slashers? I find that very unlikely. After arguing with the guy and telling him that his logic was far fetched he said he'd give me a new tire \\\"this time\\\". \\nI will never go back to Flynn's b/c of the way this guy treated me and the simple fact that they gave me a used tire!"]} -{"dataset_name": "YahooAnswers", "split": "train", "NUM_LINES": 1400000, "MD5": "f3f9899b997a42beb24157e62e3eea8d", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9Qhbd2JNdDBsQUdocVU", "first_line": [5, "why doesn't an optical mouse work on a glass table? or even on some surfaces? Optical mice use an LED and a camera to rapidly capture images of the surface beneath the mouse. The infomation from the camera is analyzed by a DSP (Digital Signal Processor) and used to detect imperfections in the underlying surface and determine motion. Some materials, such as glass, mirrors or other very shiny, uniform surfaces interfere with the ability of the DSP to accurately analyze the surface beneath the mouse. \\nSince glass is transparent and very uniform, the mouse is unable to pick up enough imperfections in the underlying surface to determine motion. Mirrored surfaces are also a problem, since they constantly reflect back the same image, causing the DSP not to recognize motion properly. When the system is unable to see surface changes associated with movement, the mouse will not work properly."]} -{"dataset_name": "YahooAnswers", "split": "test", "NUM_LINES": 60000, "MD5": "f3f9899b997a42beb24157e62e3eea8d", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9Qhbd2JNdDBsQUdocVU", "first_line": [9, "What makes friendship click? How does the spark keep going? good communication is what does it. Can you move beyond small talk and say what's really on your mind. If you start doing this, my expereince is that potentially good friends will respond or shun you. Then you know who the really good friends are."]} -{"dataset_name": "AmazonReviewPolarity", "split": "train", "NUM_LINES": 3600000, "MD5": "fe39f8b653cada45afd5792e0f0e8f9b", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbaW12WVVZS2drcnM", "first_line": [2, "Stuning even for the non-gamer This sound track was beautiful! It paints the senery in your mind so well I would recomend it even to people who hate vid. game music! I have played the game Chrono Cross but out of all of the games I have ever played it has the best music! It backs away from crude keyboarding and takes a fresher step with grate guitars and soulful orchestras. It would impress anyone who cares to listen! ^_^"]} -{"dataset_name": "AmazonReviewPolarity", "split": "test", "NUM_LINES": 400000, "MD5": "fe39f8b653cada45afd5792e0f0e8f9b", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbaW12WVVZS2drcnM", "first_line": [2, "Great CD My lovely Pat has one of the GREAT voices of her generation. I have listened to this CD for YEARS and I still LOVE IT. When I'm in a good mood it makes me feel better. A bad mood just evaporates like sugar in the rain. This CD just oozes LIFE. Vocals are jusat STUUNNING and lyrics just kill. One of life's hidden gems. This is a desert isle CD in my book. Why she never made it big is just beyond me. Everytime I play this, no matter black, white, young, old, male, female EVERYBODY says one thing \"Who was that singing ?\""]} -{"dataset_name": "AmazonReviewFull", "split": "train", "NUM_LINES": 3000000, "MD5": "57d28bd5d930e772930baddf36641c7c", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbZVhsUnRWRDhETzA", "first_line": [3, "more like funchuck Gave this to my dad for a gag gift after directing \"Nunsense,\" he got a reall kick out of it!"]} -{"dataset_name": "AmazonReviewFull", "split": "test", "NUM_LINES": 650000, "MD5": "57d28bd5d930e772930baddf36641c7c", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbZVhsUnRWRDhETzA", "first_line": [1, "mens ultrasheer This model may be ok for sedentary types, but I'm active and get around alot in my job - consistently found these stockings rolled up down by my ankles! Not Good!! Solution: go with the standard compression stocking, 20-30, stock #114622. Excellent support, stays up and gives me what I need. Both pair of these also tore as I struggled to pull them up all the time. Good riddance/bad investment!"]} -{"dataset_name": "UDPOS", "split": "train", "NUM_LINES": 12543, "MD5": "bdcac7c52d934656bae1699541424545", "URL": "https://bitbucket.org/sivareddyg/public/downloads/en-ud-v2.zip", "first_line": [["Al", "-", "Zaman", ":", "American", "forces", "killed", "Shaikh", "Abdullah", "al", "-", "Ani", ",", "the", "preacher", "at", "the", "mosque", "in", "the", "town", "of", "Qaim", ",", "near", "the", "Syrian", "border", "."], ["PROPN", "PUNCT", "PROPN", "PUNCT", "ADJ", "NOUN", "VERB", "PROPN", "PROPN", "PROPN", "PUNCT", "PROPN", "PUNCT", "DET", "NOUN", "ADP", "DET", "NOUN", "ADP", "DET", "NOUN", "ADP", "PROPN", "PUNCT", "ADP", "DET", "ADJ", "NOUN", "PUNCT"], ["NNP", "HYPH", "NNP", ":", "JJ", "NNS", "VBD", "NNP", "NNP", "NNP", "HYPH", "NNP", ",", "DT", "NN", "IN", "DT", "NN", "IN", "DT", "NN", "IN", "NNP", ",", "IN", "DT", "JJ", "NN", "."]]} -{"dataset_name": "UDPOS", "split": "valid", "NUM_LINES": 2002, "MD5": "bdcac7c52d934656bae1699541424545", "URL": "https://bitbucket.org/sivareddyg/public/downloads/en-ud-v2.zip", "first_line": [["From", "the", "AP", "comes", "this", "story", ":"], ["ADP", "DET", "PROPN", "VERB", "DET", "NOUN", "PUNCT"], ["IN", "DT", "NNP", "VBZ", "DT", "NN", ":"]]} -{"dataset_name": "UDPOS", "split": "test", "NUM_LINES": 2077, "MD5": "bdcac7c52d934656bae1699541424545", "URL": "https://bitbucket.org/sivareddyg/public/downloads/en-ud-v2.zip", "first_line": [["What", "if", "Google", "Morphed", "Into", "GoogleOS", "?"], ["PRON", "SCONJ", "PROPN", "VERB", "ADP", "PROPN", "PUNCT"], ["WP", "IN", "NNP", "VBD", "IN", "NNP", "."]]} -{"dataset_name": "CoNLL2000Chunking", "split": "train", "NUM_LINES": 8936, "MD5": {"train": "6969c2903a1f19a83569db643e43dcc8", "test": "a916e1c2d83eb3004b38fc6fcd628939"}, "URL": {"train": "https://www.clips.uantwerpen.be/conll2000/chunking/train.txt.gz", "test": "https://www.clips.uantwerpen.be/conll2000/chunking/test.txt.gz"}, "first_line": [["Confidence", "in", "the", "pound", "is", "widely", "expected", "to", "take", "another", "sharp", "dive", "if", "trade", "figures", "for", "September", ",", "due", "for", "release", "tomorrow", ",", "fail", "to", "show", "a", "substantial", "improvement", "from", "July", "and", "August", "'s", "near-record", "deficits", "."], ["NN", "IN", "DT", "NN", "VBZ", "RB", "VBN", "TO", "VB", "DT", "JJ", "NN", "IN", "NN", "NNS", "IN", "NNP", ",", "JJ", "IN", "NN", "NN", ",", "VB", "TO", "VB", "DT", "JJ", "NN", "IN", "NNP", "CC", "NNP", "POS", "JJ", "NNS", "."], ["B-NP", "B-PP", "B-NP", "I-NP", "B-VP", "I-VP", "I-VP", "I-VP", "I-VP", "B-NP", "I-NP", "I-NP", "B-SBAR", "B-NP", "I-NP", "B-PP", "B-NP", "O", "B-ADJP", "B-PP", "B-NP", "B-NP", "O", "B-VP", "I-VP", "I-VP", "B-NP", "I-NP", "I-NP", "B-PP", "B-NP", "I-NP", "I-NP", "B-NP", "I-NP", "I-NP", "O"]]} -{"dataset_name": "CoNLL2000Chunking", "split": "test", "NUM_LINES": 2012, "MD5": {"train": "6969c2903a1f19a83569db643e43dcc8", "test": "a916e1c2d83eb3004b38fc6fcd628939"}, "URL": {"train": "https://www.clips.uantwerpen.be/conll2000/chunking/train.txt.gz", "test": "https://www.clips.uantwerpen.be/conll2000/chunking/test.txt.gz"}, "first_line": [["Rockwell", "International", "Corp.", "'s", "Tulsa", "unit", "said", "it", "signed", "a", "tentative", "agreement", "extending", "its", "contract", "with", "Boeing", "Co.", "to", "provide", "structural", "parts", "for", "Boeing", "'s", "747", "jetliners", "."], ["NNP", "NNP", "NNP", "POS", "NNP", "NN", "VBD", "PRP", "VBD", "DT", "JJ", "NN", "VBG", "PRP$", "NN", "IN", "NNP", "NNP", "TO", "VB", "JJ", "NNS", "IN", "NNP", "POS", "CD", "NNS", "."], ["B-NP", "I-NP", "I-NP", "B-NP", "I-NP", "I-NP", "B-VP", "B-NP", "B-VP", "B-NP", "I-NP", "I-NP", "B-VP", "B-NP", "I-NP", "B-PP", "B-NP", "I-NP", "B-VP", "I-VP", "B-NP", "I-NP", "B-PP", "B-NP", "B-NP", "I-NP", "I-NP", "O"]]} -{"dataset_name": "Multi30k", "split": "train", "NUM_LINES": 29000, "MD5": ["d9a5fc268917725a2b0efce3a0cc8607", "81ff90b99829c0cd4b1b587d394afd39", "0065d13af80720a55ca8153d126e6627", "6cb767741dcad3931f966fefbc05203f", "62f36422bfab90fb42a560546b704009", "540da4566bb6dd35fdbc720218b742b7", "613eb4a3f0c2b13f0871ced946851b0e", "d848fe0ae8b9447209fb49c5c31cb3d2", "abc13b4042f4fef1cdff6de3b6c53b71", "cbf5bfc2147706f228d288e1b18bf4af", "bdfe4222f4692ccaa1e3389460f0890e", "0e1ee2b4145795bd180b193424db204b", "1cff688d1aadef7fdb22e9ad27d6fd2c", "3e10289959d0059952511c31df3c7550"], "URL": ["https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/train.cs.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/train.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/train.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/train.fr.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.1.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.2.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.3.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.4.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.5.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.1.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.2.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.3.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.4.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.5.en.gz"], "first_line": ["Zwei junge wei\u00dfe M\u00e4nner sind im Freien in der N\u00e4he vieler B\u00fcsche.\n", "Two young, White males are outside near many bushes.\n"]} -{"dataset_name": "Multi30k", "split": "valid", "NUM_LINES": 1014, "MD5": ["83cdc082f646b769095615384cf5c0ca", "6e0e229eb049e3fc99a1ef02fb2d5f91", "2b69aa9253948ac9f67e94917272dd40", "93fc564584b7e5ba410c761ea5a1c682", "b26486ede1d4436d5acf6e38c65bb44d", "16165248083beacebfe18866d5f4f0ae", "7180780822d4b600eb81c1ccf171c230", "8edb43c90cae66ec762748a968089b99", "873a377a348713d3ab84db1fb57cdede", "df57faf5f00d434d2559c021ef55f1aa", "9077a5127480cc799116384de501bd70", "c1f697c3b6dfb7305349db34e26b45fc", "acb5ea26a577ceccfae6337181c31716", "680816e0938fea5cf5331444bc09a4cf"], "URL": ["https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/val.cs.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/val.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/val.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/val.fr.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.1.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.2.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.3.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.4.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.5.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.1.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.2.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.3.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.4.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.5.en.gz"], "first_line": ["Eine Gruppe von M\u00e4nnern l\u00e4dt Baumwolle auf einen Lastwagen\n", "A group of men are loading cotton onto a truck\n"]} -{"dataset_name": "Multi30k", "split": "test", "NUM_LINES": 1000, "MD5": ["3104872229daa1bef3b401d44dd2220b", "efd67d314d98489b716b145475101932", "6a8d5c87f6ae19e3d35681aa6fd16571", "e8cd6ec2bc8a11fc846fa48a46e3d0bb", "ff2c0fcb4893a13bd73414306bc250ae", "005396bac545d880abe6f00bbb7dbbb4", "a7b684e0edbef1d4a23660c8e8e743fd", "a152878809942757a55ce087073486b8", "08dc7cd4a662f31718412de95ca9bfe3", "cb09af7d2b501f9112f2d6a59fa1360d", "4995d10954a804d3cdfd907b9fd093e8", "ac0c72653c140dd96707212a1baa4278", "6dfb42cae4e4fd9a3c40e62ff5398a55", "ece8cec6b87bf00dd12607f3062dae4c", "9a7e7b2dcc33135a32cd621c3b37d2d8", "7d5ef0f069ee2d74dc2fdc6b46cd47fa", "eec05227daba4bb8f3f8f25b1cb335f4", "9318fa08c0c0b96114eadb10eb2fc633", "088ec0765fa213a0eb937a62adfd4996", "5f7c8d0be0ac739856b47d32a9434998", "713ed720636622a54546d5f14f88b00f"], "URL": ["https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2016_flickr.cs.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2016_flickr.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_flickr.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_mscoco.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2016_flickr.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_flickr.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_mscoco.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2018_flickr.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2016_flickr.fr.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_flickr.fr.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_mscoco.fr.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.1.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.2.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.3.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.4.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.5.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.1.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.2.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.3.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.4.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.5.en.gz"], "first_line": ["Ein Mann mit einem orangefarbenen Hut, der etwas anstarrt.\n", "A man in an orange hat starring at something.\n"]} -{"dataset_name": "IWSLT2016", "split": "train", "NUM_LINES": 196884, "MD5": "c393ed3fc2a1b0f004b3331043f615ae", "URL": "https://drive.google.com/uc?id=1l5y6Giag9aRPwGtuZHswh3w5v3qEz8D8", "first_line": ["David Gallo: Das ist Bill Lange. Ich bin Dave Gallo.\n", "David Gallo: This is Bill Lange. I'm Dave Gallo.\n"]} -{"dataset_name": "IWSLT2016", "split": "valid", "NUM_LINES": 993, "MD5": "c393ed3fc2a1b0f004b3331043f615ae", "URL": "https://drive.google.com/uc?id=1l5y6Giag9aRPwGtuZHswh3w5v3qEz8D8", "first_line": ["Als ich 11 Jahre alt war, wurde ich eines Morgens von den Kl\u00e4ngen heller Freude geweckt.\n", "When I was 11, I remember waking up one morning to the sound of joy in my house.\n"]} -{"dataset_name": "IWSLT2016", "split": "test", "NUM_LINES": 1305, "MD5": "c393ed3fc2a1b0f004b3331043f615ae", "URL": "https://drive.google.com/uc?id=1l5y6Giag9aRPwGtuZHswh3w5v3qEz8D8", "first_line": ["Als ich in meinen 20ern war, hatte ich meine erste Psychotherapie-Patientin.\n", "When I was in my 20s, I saw my very first psychotherapy client.\n"]} -{"dataset_name": "IWSLT2017", "split": "train", "NUM_LINES": 206112, "MD5": "aca701032b1c4411afc4d9fa367796ba", "URL": "https://drive.google.com/u/0/uc?id=12ycYSzLIG253AFN35Y6qoyf9wtkOjakp", "first_line": ["Vielen Dank, Chris.\n", "Thank you so much, Chris.\n"]} -{"dataset_name": "IWSLT2017", "split": "valid", "NUM_LINES": 888, "MD5": "aca701032b1c4411afc4d9fa367796ba", "URL": "https://drive.google.com/u/0/uc?id=12ycYSzLIG253AFN35Y6qoyf9wtkOjakp", "first_line": ["Letztes Jahr habe ich diese beiden Folien gezeigt, um zu veranschaulichen, dass die arktische Eiskappe, die f\u00fcr ann\u00e4hernd drei Millionen Jahre die Gr\u00f6sse der unteren 48 Staaten hatte, um 40 Prozent geschrumpft ist.\n","Last year I showed these two slides so that demonstrate that the arctic ice cap, which for most of the last three million years has been the size of the lower 48 states, has shrunk by 40 percent.\n"]} -{"dataset_name": "IWSLT2017", "split": "test", "NUM_LINES": 1568, "MD5": "aca701032b1c4411afc4d9fa367796ba", "URL": "https://drive.google.com/u/0/uc?id=12ycYSzLIG253AFN35Y6qoyf9wtkOjakp", "first_line": ["Vor einigen Jahren, hier bei TED, stellte Peter Skillman einen Design-Wettbewerb namens \"Die Marshmallow-Herausforderung\" vor.\n","Several years ago here at TED, Peter Skillman introduced a design challenge called the marshmallow challenge.\n"]} -{"dataset_name": "WMT14", "split": "train", "NUM_LINES": 4500966, "MD5": "874ab6bbfe9c21ec987ed1b9347f95ec", "URL": "https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8", "first_line": ["Wiederaufnahme der Sitzungsperiode\n", "Res@@ um@@ ption of the session\n"]} -{"dataset_name": "WMT14", "split": "valid", "NUM_LINES": 3000, "MD5": "874ab6bbfe9c21ec987ed1b9347f95ec", "URL": "https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8", "first_line": ["Eine repub@@ li@@ kanische Strategie , um der Wieder@@ wahl von Obama entgegen@@ zu@@ treten\n", "A Republic@@ an strategy to counter the re-@@ election of Obama\n"]} -{"dataset_name": "WMT14", "split": "test", "NUM_LINES": 3003, "MD5": "874ab6bbfe9c21ec987ed1b9347f95ec", "URL": "https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8", "first_line": ["Gut@@ ach : Noch mehr Sicherheit f\u00fcr Fu\u00dfg\u00e4n@@ ger\n", "Gut@@ ach : Incre@@ ased safety for pedestri@@ ans\n"]} -{"dataset_name": "WikiText2", "split": "train", "NUM_LINES": 36718, "MD5": "542ccefacc6c27f945fb54453812b3cd", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip", "first_line": " \n"} -{"dataset_name": "WikiText2", "split": "valid", "NUM_LINES": 3760, "MD5": "542ccefacc6c27f945fb54453812b3cd", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip", "first_line": " \n"} -{"dataset_name": "WikiText2", "split": "test", "NUM_LINES": 4358, "MD5": "542ccefacc6c27f945fb54453812b3cd", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip", "first_line": " \n"} -{"dataset_name": "WikiText103", "split": "train", "NUM_LINES": 1801350, "MD5": "9ddaacaf6af0710eda8c456decff7832", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip", "first_line": " \n"} -{"dataset_name": "WikiText103", "split": "valid", "NUM_LINES": 3760, "MD5": "9ddaacaf6af0710eda8c456decff7832", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip", "first_line": " \n"} -{"dataset_name": "WikiText103", "split": "test", "NUM_LINES": 4358, "MD5": "9ddaacaf6af0710eda8c456decff7832", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip", "first_line": " \n"} -{"dataset_name": "PennTreebank", "split": "train", "NUM_LINES": 42068, "MD5": {"train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", "valid": "aa0affc06ff7c36e977d7cd49e3839bf", "test": "8b80168b89c18661a38ef683c0dc3721"}, "URL": {"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt"}, "first_line": " aer banknote berlitz calloway centrust cluett fromstein gitano guterman hydro-quebec ipo kia memotec mlx nahb punts rake regatta rubens sim snack-food ssangyong swapo wachter \n"} -{"dataset_name": "PennTreebank", "split": "valid", "NUM_LINES": 3370, "MD5": {"train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", "valid": "aa0affc06ff7c36e977d7cd49e3839bf", "test": "8b80168b89c18661a38ef683c0dc3721"}, "URL": {"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt"}, "first_line": " consumers may want to move their telephones a little closer to the tv set \n"} -{"dataset_name": "PennTreebank", "split": "test", "NUM_LINES": 3761, "MD5": {"train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", "valid": "aa0affc06ff7c36e977d7cd49e3839bf", "test": "8b80168b89c18661a38ef683c0dc3721"}, "URL": {"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt"}, "first_line": " no it was n't black monday \n"} -{"dataset_name": "SQuAD1", "split": "train", "NUM_LINES": 87599, "MD5": {"train": "981b29407e0affa3b1b156f72073b945", "dev": "3e85deb501d4e538b6bc56f786231552"}, "URL": {"train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json", "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json"}, "first_line": ["Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \"Venite Ad Me Omnes\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.", "To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?", ["Saint Bernadette Soubirous"], [515]]} -{"dataset_name": "SQuAD1", "split": "dev", "NUM_LINES": 10570, "MD5": {"train": "981b29407e0affa3b1b156f72073b945", "dev": "3e85deb501d4e538b6bc56f786231552"}, "URL": {"train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json", "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json"}, "first_line": ["Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24\u201310 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50.", "Which NFL team represented the AFC at Super Bowl 50?", ["Denver Broncos", "Denver Broncos", "Denver Broncos"], [177, 177, 177]]} -{"dataset_name": "SQuAD2", "split": "train", "NUM_LINES": 130319, "MD5": {"train": "62108c273c268d70893182d5cf8df740", "dev": "246adae8b7002f8679c027697b0b7cf8"}, "URL": {"train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json", "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json"}, "first_line": ["Beyonc\u00e9 Giselle Knowles-Carter (/bi\u02d0\u02c8j\u0252nse\u026a/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child. Managed by her father, Mathew Knowles, the group became one of the world's best-selling girl groups of all time. Their hiatus saw the release of Beyonc\u00e9's debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles \"Crazy in Love\" and \"Baby Boy\".", "When did Beyonce start becoming popular?", ["in the late 1990s"], [269]]} -{"dataset_name": "SQuAD2", "split": "dev", "NUM_LINES": 11873, "MD5": {"train": "62108c273c268d70893182d5cf8df740", "dev": "246adae8b7002f8679c027697b0b7cf8"}, "URL": {"train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json", "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json"}, "first_line": ["The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse (\"Norman\" comes from \"Norseman\") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.", "In what country is Normandy located?", ["France", "France", "France", "France"], [159, 159, 159, 159]]} -{"dataset_name": "EnWik9", "split": "train", "NUM_LINES": 13147026, "MD5": "3e773f8a1577fda2e27f871ca17f31fd", "URL": "http://mattmahoney.net/dc/enwik9.zip", "first_line": "\n"} +{"meta_data": +[{"dataset_name": "IMDB", "split": "train", "NUM_LINES": 25000, "MD5": "7c2ac02c03563afcf9b574c7e56c153a", "URL": "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", "first_line": ["neg", "I rented I AM CURIOUS-YELLOW from my video store because of all the controversy that surrounded it when it was first released in 1967. I also heard that at first it was seized by U.S. customs if it ever tried to enter this country, therefore being a fan of films considered \"controversial\" I really had to see this for myself.

The plot is centered around a young Swedish drama student named Lena who wants to learn everything she can about life. In particular she wants to focus her attentions to making some sort of documentary on what the average Swede thought about certain political issues such as the Vietnam War and race issues in the United States. In between asking politicians and ordinary denizens of Stockholm about their opinions on politics, she has sex with her drama teacher, classmates, and married men.

What kills me about I AM CURIOUS-YELLOW is that 40 years ago, this was considered pornographic. Really, the sex and nudity scenes are few and far between, even then it's not shot like some cheaply made porno. While my countrymen mind find it shocking, in reality sex and nudity are a major staple in Swedish cinema. Even Ingmar Bergman, arguably their answer to good old boy John Ford, had sex scenes in his films.

I do commend the filmmakers for the fact that any sex shown in the film is shown for artistic purposes rather than just to shock people and make money to be shown in pornographic theaters in America. I AM CURIOUS-YELLOW is a good film for anyone wanting to study the meat and potatoes (no pun intended) of Swedish cinema. But really, this film doesn't have much of a plot."]}, +{"dataset_name": "IMDB", "split": "test", "NUM_LINES": 25000, "MD5": "7c2ac02c03563afcf9b574c7e56c153a", "URL": "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", "first_line": ["neg", "I love sci-fi and am willing to put up with a lot. Sci-fi movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original). Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn't match the background, and painfully one-dimensional characters cannot be overcome with a 'sci-fi' setting. (I'm sure there are those of you out there who think Babylon 5 is good sci-fi TV. It's not. It's clich\u00e9d and uninspiring.) While US viewers might like emotion and character development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may treat important issues, yet not as a serious philosophy. It's really difficult to care about the characters here as they are not simply foolish, just missing a spark of life. Their actions and reactions are wooden and predictable, often painful to watch. The makers of Earth KNOW it's rubbish as they have to always say \"Gene Roddenberry's Earth...\" otherwise people would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull, cheap, poorly edited (watching it without advert breaks really brings this home) trudging Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring him back as another actor. Jeeez! Dallas all over again."]}, +{"dataset_name": "AG_NEWS", "split": "train", "NUM_LINES": 120000, "MD5": "b1a00f826fdfbd249f79597b59e1dc12", "URL": "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/train.csv", "first_line": [3, "Wall St. Bears Claw Back Into the Black (Reuters) Reuters - Short-sellers, Wall Street's dwindling\\band of ultra-cynics, are seeing green again."]}, +{"dataset_name": "AG_NEWS", "split": "test", "NUM_LINES": 7600, "MD5": "d52ea96a97a2d943681189a97654912d", "URL": "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/test.csv", "first_line": [3, "Fears for T N pension after talks Unions representing workers at Turner Newall say they are 'disappointed' after talks with stricken parent firm Federal Mogul."]}, +{"dataset_name": "SogouNews", "split": "train", "NUM_LINES": 450000, "MD5": "0c1700ba70b73f964dd8de569d3fd03e", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbUkVqNEszd0pHaFE", "first_line": [4, "2008 di4 qi1 jie4 qi1ng da3o guo2 ji4 che1 zha3n me3i nv3 mo2 te4 2008di4 qi1 jie4 qi1ng da3o guo2 ji4 che1 zha3n yu2 15 ri4 za4i qi1ng da3o guo2 ji4 hui4 zha3n zho1ng xi1n she4ng da4 ka1i mu4 . be3n ci4 che1 zha3n jia1ng chi2 xu4 da4o be3n yue4 19 ri4 . ji1n nia2n qi1ng da3o guo2 ji4 che1 zha3n shi4 li4 nia2n da3o che2ng che1 zha3n gui1 mo2 zui4 da4 di2 yi1 ci4 , shi3 yo4ng lia3o qi1ng da3o guo2 ji4 hui4 zha3n zho1ng xi1n di2 qua2n bu4 shi4 ne4i wa4i zha3n gua3n . yi3 xia4 we2i xia4n cha3ng mo2 te4 tu2 pia4n ."]}, +{"dataset_name": "SogouNews", "split": "test", "NUM_LINES": 60000, "MD5": "0c1700ba70b73f964dd8de569d3fd03e", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbUkVqNEszd0pHaFE", "first_line": [1, " ti3 ca1o shi4 jie4 be1i : che2ng fe1i na2 pi2ng he2ng mu4 zi4 yo2u ca1o ji1n pa2i su4 du4 : ( shuo1 mi2ng : dia3n ji1 zi4 do4ng bo1 fa4ng )\\n shuo1 mi2ng : dia3n ji1 ga1i a4n niu3 , xua3n ze2 yi1 lu4n ta2n ji2 ke3 "]}, +{"dataset_name": "DBpedia", "split": "train", "NUM_LINES": 560000, "MD5": "dca7b1ae12b1091090db52aa7ec5ca64", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k", "first_line": [1, "E. D. Abbott Ltd Abbott of Farnham E D Abbott Limited was a British coachbuilding business based in Farnham Surrey trading under that name from 1929. A major part of their output was under sub-contract to motor vehicle manufacturers. Their business closed in 1972."]}, +{"dataset_name": "DBpedia", "split": "test", "NUM_LINES": 70000, "MD5": "dca7b1ae12b1091090db52aa7ec5ca64", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k", "first_line": [1, "TY KU TY KU /ta\u026aku\u02d0/ is an American alcoholic beverage company that specializes in sake and other spirits. The privately-held company was founded in 2004 and is headquartered in New York City New York. While based in New York TY KU's beverages are made in Japan through a joint venture with two sake breweries. Since 2011 TY KU's growth has extended its products into all 50 states."]}, +{"dataset_name": "YelpReviewPolarity", "split": "train", "NUM_LINES": 560000, "MD5": "620c8ae4bd5a150b730f1ba9a7c6a4d3", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbNUpYQ2N3SGlFaDg", "first_line": [1, "Unfortunately, the frustration of being Dr. Goldberg's patient is a repeat of the experience I've had with so many other doctors in NYC -- good doctor, terrible staff. It seems that his staff simply never answers the phone. It usually takes 2 hours of repeated calling to get an answer. Who has time for that or wants to deal with it? I have run into this problem with many other doctors and I just don't get it. You have office workers, you have patients with medical needs, why isn't anyone answering the phone? It's incomprehensible and not work the aggravation. It's with regret that I feel that I have to give Dr. Goldberg 2 stars."]}, +{"dataset_name": "YelpReviewPolarity", "split": "test", "NUM_LINES": 38000, "MD5": "620c8ae4bd5a150b730f1ba9a7c6a4d3", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbNUpYQ2N3SGlFaDg", "first_line": [2, "Contrary to other reviews, I have zero complaints about the service or the prices. I have been getting tire service here for the past 5 years now, and compared to my experience with places like Pep Boys, these guys are experienced and know what they're doing. \\nAlso, this is one place that I do not feel like I am being taken advantage of, just because of my gender. Other auto mechanics have been notorious for capitalizing on my ignorance of cars, and have sucked my bank account dry. But here, my service and road coverage has all been well explained - and let up to me to decide. \\nAnd they just renovated the waiting room. It looks a lot better than it did in previous years."]}, +{"dataset_name": "YelpReviewFull", "split": "train", "NUM_LINES": 650000, "MD5": "f7ddfafed1033f68ec72b9267863af6c", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbZlU4dXhHTFhZQU0", "first_line": [5, "dr. goldberg offers everything i look for in a general practitioner. he's nice and easy to talk to without being patronizing; he's always on time in seeing his patients; he's affiliated with a top-notch hospital (nyu) which my parents have explained to me is very important in case something happens and you need surgery; and you can get referrals to see specialists without having to see him first. really, what more do you need? i'm sitting here trying to think of any complaints i have about him, but i'm really drawing a blank."]}, +{"dataset_name": "YelpReviewFull", "split": "test", "NUM_LINES": 50000, "MD5": "f7ddfafed1033f68ec72b9267863af6c", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbZlU4dXhHTFhZQU0", "first_line": [1, "I got 'new' tires from them and within two weeks got a flat. I took my car to a local mechanic to see if i could get the hole patched, but they said the reason I had a flat was because the previous patch had blown - WAIT, WHAT? I just got the tire and never needed to have it patched? This was supposed to be a new tire. \\nI took the tire over to Flynn's and they told me that someone punctured my tire, then tried to patch it. So there are resentful tire slashers? I find that very unlikely. After arguing with the guy and telling him that his logic was far fetched he said he'd give me a new tire \\\"this time\\\". \\nI will never go back to Flynn's b/c of the way this guy treated me and the simple fact that they gave me a used tire!"]}, +{"dataset_name": "YahooAnswers", "split": "train", "NUM_LINES": 1400000, "MD5": "f3f9899b997a42beb24157e62e3eea8d", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9Qhbd2JNdDBsQUdocVU", "first_line": [5, "why doesn't an optical mouse work on a glass table? or even on some surfaces? Optical mice use an LED and a camera to rapidly capture images of the surface beneath the mouse. The infomation from the camera is analyzed by a DSP (Digital Signal Processor) and used to detect imperfections in the underlying surface and determine motion. Some materials, such as glass, mirrors or other very shiny, uniform surfaces interfere with the ability of the DSP to accurately analyze the surface beneath the mouse. \\nSince glass is transparent and very uniform, the mouse is unable to pick up enough imperfections in the underlying surface to determine motion. Mirrored surfaces are also a problem, since they constantly reflect back the same image, causing the DSP not to recognize motion properly. When the system is unable to see surface changes associated with movement, the mouse will not work properly."]}, +{"dataset_name": "YahooAnswers", "split": "test", "NUM_LINES": 60000, "MD5": "f3f9899b997a42beb24157e62e3eea8d", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9Qhbd2JNdDBsQUdocVU", "first_line": [9, "What makes friendship click? How does the spark keep going? good communication is what does it. Can you move beyond small talk and say what's really on your mind. If you start doing this, my expereince is that potentially good friends will respond or shun you. Then you know who the really good friends are."]}, +{"dataset_name": "AmazonReviewPolarity", "split": "train", "NUM_LINES": 3600000, "MD5": "fe39f8b653cada45afd5792e0f0e8f9b", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbaW12WVVZS2drcnM", "first_line": [2, "Stuning even for the non-gamer This sound track was beautiful! It paints the senery in your mind so well I would recomend it even to people who hate vid. game music! I have played the game Chrono Cross but out of all of the games I have ever played it has the best music! It backs away from crude keyboarding and takes a fresher step with grate guitars and soulful orchestras. It would impress anyone who cares to listen! ^_^"]}, +{"dataset_name": "AmazonReviewPolarity", "split": "test", "NUM_LINES": 400000, "MD5": "fe39f8b653cada45afd5792e0f0e8f9b", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbaW12WVVZS2drcnM", "first_line": [2, "Great CD My lovely Pat has one of the GREAT voices of her generation. I have listened to this CD for YEARS and I still LOVE IT. When I'm in a good mood it makes me feel better. A bad mood just evaporates like sugar in the rain. This CD just oozes LIFE. Vocals are jusat STUUNNING and lyrics just kill. One of life's hidden gems. This is a desert isle CD in my book. Why she never made it big is just beyond me. Everytime I play this, no matter black, white, young, old, male, female EVERYBODY says one thing \"Who was that singing ?\""]}, +{"dataset_name": "AmazonReviewFull", "split": "train", "NUM_LINES": 3000000, "MD5": "57d28bd5d930e772930baddf36641c7c", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbZVhsUnRWRDhETzA", "first_line": [3, "more like funchuck Gave this to my dad for a gag gift after directing \"Nunsense,\" he got a reall kick out of it!"]}, +{"dataset_name": "AmazonReviewFull", "split": "test", "NUM_LINES": 650000, "MD5": "57d28bd5d930e772930baddf36641c7c", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbZVhsUnRWRDhETzA", "first_line": [1, "mens ultrasheer This model may be ok for sedentary types, but I'm active and get around alot in my job - consistently found these stockings rolled up down by my ankles! Not Good!! Solution: go with the standard compression stocking, 20-30, stock #114622. Excellent support, stays up and gives me what I need. Both pair of these also tore as I struggled to pull them up all the time. Good riddance/bad investment!"]}, +{"dataset_name": "UDPOS", "split": "train", "NUM_LINES": 12543, "MD5": "bdcac7c52d934656bae1699541424545", "URL": "https://bitbucket.org/sivareddyg/public/downloads/en-ud-v2.zip", "first_line": [["Al", "-", "Zaman", ":", "American", "forces", "killed", "Shaikh", "Abdullah", "al", "-", "Ani", ",", "the", "preacher", "at", "the", "mosque", "in", "the", "town", "of", "Qaim", ",", "near", "the", "Syrian", "border", "."], ["PROPN", "PUNCT", "PROPN", "PUNCT", "ADJ", "NOUN", "VERB", "PROPN", "PROPN", "PROPN", "PUNCT", "PROPN", "PUNCT", "DET", "NOUN", "ADP", "DET", "NOUN", "ADP", "DET", "NOUN", "ADP", "PROPN", "PUNCT", "ADP", "DET", "ADJ", "NOUN", "PUNCT"], ["NNP", "HYPH", "NNP", ":", "JJ", "NNS", "VBD", "NNP", "NNP", "NNP", "HYPH", "NNP", ",", "DT", "NN", "IN", "DT", "NN", "IN", "DT", "NN", "IN", "NNP", ",", "IN", "DT", "JJ", "NN", "."]]}, +{"dataset_name": "UDPOS", "split": "valid", "NUM_LINES": 2002, "MD5": "bdcac7c52d934656bae1699541424545", "URL": "https://bitbucket.org/sivareddyg/public/downloads/en-ud-v2.zip", "first_line": [["From", "the", "AP", "comes", "this", "story", ":"], ["ADP", "DET", "PROPN", "VERB", "DET", "NOUN", "PUNCT"], ["IN", "DT", "NNP", "VBZ", "DT", "NN", ":"]]}, +{"dataset_name": "UDPOS", "split": "test", "NUM_LINES": 2077, "MD5": "bdcac7c52d934656bae1699541424545", "URL": "https://bitbucket.org/sivareddyg/public/downloads/en-ud-v2.zip", "first_line": [["What", "if", "Google", "Morphed", "Into", "GoogleOS", "?"], ["PRON", "SCONJ", "PROPN", "VERB", "ADP", "PROPN", "PUNCT"], ["WP", "IN", "NNP", "VBD", "IN", "NNP", "."]]}, +{"dataset_name": "CoNLL2000Chunking", "split": "train", "NUM_LINES": 8936, "MD5": {"train": "6969c2903a1f19a83569db643e43dcc8", "test": "a916e1c2d83eb3004b38fc6fcd628939"}, "URL": {"train": "https://www.clips.uantwerpen.be/conll2000/chunking/train.txt.gz", "test": "https://www.clips.uantwerpen.be/conll2000/chunking/test.txt.gz"}, "first_line": [["Confidence", "in", "the", "pound", "is", "widely", "expected", "to", "take", "another", "sharp", "dive", "if", "trade", "figures", "for", "September", ",", "due", "for", "release", "tomorrow", ",", "fail", "to", "show", "a", "substantial", "improvement", "from", "July", "and", "August", "'s", "near-record", "deficits", "."], ["NN", "IN", "DT", "NN", "VBZ", "RB", "VBN", "TO", "VB", "DT", "JJ", "NN", "IN", "NN", "NNS", "IN", "NNP", ",", "JJ", "IN", "NN", "NN", ",", "VB", "TO", "VB", "DT", "JJ", "NN", "IN", "NNP", "CC", "NNP", "POS", "JJ", "NNS", "."], ["B-NP", "B-PP", "B-NP", "I-NP", "B-VP", "I-VP", "I-VP", "I-VP", "I-VP", "B-NP", "I-NP", "I-NP", "B-SBAR", "B-NP", "I-NP", "B-PP", "B-NP", "O", "B-ADJP", "B-PP", "B-NP", "B-NP", "O", "B-VP", "I-VP", "I-VP", "B-NP", "I-NP", "I-NP", "B-PP", "B-NP", "I-NP", "I-NP", "B-NP", "I-NP", "I-NP", "O"]]}, +{"dataset_name": "CoNLL2000Chunking", "split": "test", "NUM_LINES": 2012, "MD5": {"train": "6969c2903a1f19a83569db643e43dcc8", "test": "a916e1c2d83eb3004b38fc6fcd628939"}, "URL": {"train": "https://www.clips.uantwerpen.be/conll2000/chunking/train.txt.gz", "test": "https://www.clips.uantwerpen.be/conll2000/chunking/test.txt.gz"}, "first_line": [["Rockwell", "International", "Corp.", "'s", "Tulsa", "unit", "said", "it", "signed", "a", "tentative", "agreement", "extending", "its", "contract", "with", "Boeing", "Co.", "to", "provide", "structural", "parts", "for", "Boeing", "'s", "747", "jetliners", "."], ["NNP", "NNP", "NNP", "POS", "NNP", "NN", "VBD", "PRP", "VBD", "DT", "JJ", "NN", "VBG", "PRP$", "NN", "IN", "NNP", "NNP", "TO", "VB", "JJ", "NNS", "IN", "NNP", "POS", "CD", "NNS", "."], ["B-NP", "I-NP", "I-NP", "B-NP", "I-NP", "I-NP", "B-VP", "B-NP", "B-VP", "B-NP", "I-NP", "I-NP", "B-VP", "B-NP", "I-NP", "B-PP", "B-NP", "I-NP", "B-VP", "I-VP", "B-NP", "I-NP", "B-PP", "B-NP", "B-NP", "I-NP", "I-NP", "O"]]}, +{"dataset_name": "Multi30k", "split": "train", "NUM_LINES": 29000, "MD5": ["d9a5fc268917725a2b0efce3a0cc8607", "81ff90b99829c0cd4b1b587d394afd39", "0065d13af80720a55ca8153d126e6627", "6cb767741dcad3931f966fefbc05203f", "62f36422bfab90fb42a560546b704009", "540da4566bb6dd35fdbc720218b742b7", "613eb4a3f0c2b13f0871ced946851b0e", "d848fe0ae8b9447209fb49c5c31cb3d2", "abc13b4042f4fef1cdff6de3b6c53b71", "cbf5bfc2147706f228d288e1b18bf4af", "bdfe4222f4692ccaa1e3389460f0890e", "0e1ee2b4145795bd180b193424db204b", "1cff688d1aadef7fdb22e9ad27d6fd2c", "3e10289959d0059952511c31df3c7550"], "URL": ["https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/train.cs.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/train.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/train.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/train.fr.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.1.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.2.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.3.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.4.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.5.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.1.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.2.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.3.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.4.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.5.en.gz"], "first_line": ["Zwei junge wei\u00dfe M\u00e4nner sind im Freien in der N\u00e4he vieler B\u00fcsche.\n", "Two young, White males are outside near many bushes.\n"]}, +{"dataset_name": "Multi30k", "split": "valid", "NUM_LINES": 1014, "MD5": ["83cdc082f646b769095615384cf5c0ca", "6e0e229eb049e3fc99a1ef02fb2d5f91", "2b69aa9253948ac9f67e94917272dd40", "93fc564584b7e5ba410c761ea5a1c682", "b26486ede1d4436d5acf6e38c65bb44d", "16165248083beacebfe18866d5f4f0ae", "7180780822d4b600eb81c1ccf171c230", "8edb43c90cae66ec762748a968089b99", "873a377a348713d3ab84db1fb57cdede", "df57faf5f00d434d2559c021ef55f1aa", "9077a5127480cc799116384de501bd70", "c1f697c3b6dfb7305349db34e26b45fc", "acb5ea26a577ceccfae6337181c31716", "680816e0938fea5cf5331444bc09a4cf"], "URL": ["https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/val.cs.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/val.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/val.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/val.fr.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.1.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.2.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.3.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.4.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.5.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.1.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.2.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.3.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.4.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.5.en.gz"], "first_line": ["Eine Gruppe von M\u00e4nnern l\u00e4dt Baumwolle auf einen Lastwagen\n", "A group of men are loading cotton onto a truck\n"]}, +{"dataset_name": "Multi30k", "split": "test", "NUM_LINES": 1000, "MD5": ["3104872229daa1bef3b401d44dd2220b", "efd67d314d98489b716b145475101932", "6a8d5c87f6ae19e3d35681aa6fd16571", "e8cd6ec2bc8a11fc846fa48a46e3d0bb", "ff2c0fcb4893a13bd73414306bc250ae", "005396bac545d880abe6f00bbb7dbbb4", "a7b684e0edbef1d4a23660c8e8e743fd", "a152878809942757a55ce087073486b8", "08dc7cd4a662f31718412de95ca9bfe3", "cb09af7d2b501f9112f2d6a59fa1360d", "4995d10954a804d3cdfd907b9fd093e8", "ac0c72653c140dd96707212a1baa4278", "6dfb42cae4e4fd9a3c40e62ff5398a55", "ece8cec6b87bf00dd12607f3062dae4c", "9a7e7b2dcc33135a32cd621c3b37d2d8", "7d5ef0f069ee2d74dc2fdc6b46cd47fa", "eec05227daba4bb8f3f8f25b1cb335f4", "9318fa08c0c0b96114eadb10eb2fc633", "088ec0765fa213a0eb937a62adfd4996", "5f7c8d0be0ac739856b47d32a9434998", "713ed720636622a54546d5f14f88b00f"], "URL": ["https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2016_flickr.cs.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2016_flickr.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_flickr.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_mscoco.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2016_flickr.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_flickr.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_mscoco.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2018_flickr.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2016_flickr.fr.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_flickr.fr.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_mscoco.fr.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.1.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.2.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.3.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.4.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.5.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.1.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.2.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.3.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.4.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.5.en.gz"], "first_line": ["Ein Mann mit einem orangefarbenen Hut, der etwas anstarrt.\n", "A man in an orange hat starring at something.\n"]}, +{"dataset_name": "IWSLT2016", "split": "train", "NUM_LINES": 196884, "MD5": "c393ed3fc2a1b0f004b3331043f615ae", "URL": "https://drive.google.com/uc?id=1l5y6Giag9aRPwGtuZHswh3w5v3qEz8D8", "first_line": ["David Gallo: Das ist Bill Lange. Ich bin Dave Gallo.\n", "David Gallo: This is Bill Lange. I'm Dave Gallo.\n"]}, +{"dataset_name": "IWSLT2016", "split": "valid", "NUM_LINES": 993, "MD5": "c393ed3fc2a1b0f004b3331043f615ae", "URL": "https://drive.google.com/uc?id=1l5y6Giag9aRPwGtuZHswh3w5v3qEz8D8", "first_line": ["Als ich 11 Jahre alt war, wurde ich eines Morgens von den Kl\u00e4ngen heller Freude geweckt.\n", "When I was 11, I remember waking up one morning to the sound of joy in my house.\n"]}, +{"dataset_name": "IWSLT2016", "split": "test", "NUM_LINES": 1305, "MD5": "c393ed3fc2a1b0f004b3331043f615ae", "URL": "https://drive.google.com/uc?id=1l5y6Giag9aRPwGtuZHswh3w5v3qEz8D8", "first_line": ["Als ich in meinen 20ern war, hatte ich meine erste Psychotherapie-Patientin.\n", "When I was in my 20s, I saw my very first psychotherapy client.\n"]}, +{"dataset_name": "IWSLT2017", "split": "train", "NUM_LINES": 206112, "MD5": "aca701032b1c4411afc4d9fa367796ba", "URL": "https://drive.google.com/u/0/uc?id=12ycYSzLIG253AFN35Y6qoyf9wtkOjakp", "first_line": ["Vielen Dank, Chris.\n", "Thank you so much, Chris.\n"]}, +{"dataset_name": "IWSLT2017", "split": "valid", "NUM_LINES": 888, "MD5": "aca701032b1c4411afc4d9fa367796ba", "URL": "https://drive.google.com/u/0/uc?id=12ycYSzLIG253AFN35Y6qoyf9wtkOjakp", "first_line": ["Letztes Jahr habe ich diese beiden Folien gezeigt, um zu veranschaulichen, dass die arktische Eiskappe, die f\u00fcr ann\u00e4hernd drei Millionen Jahre die Gr\u00f6sse der unteren 48 Staaten hatte, um 40 Prozent geschrumpft ist.\n","Last year I showed these two slides so that demonstrate that the arctic ice cap, which for most of the last three million years has been the size of the lower 48 states, has shrunk by 40 percent.\n"]}, +{"dataset_name": "IWSLT2017", "split": "test", "NUM_LINES": 1568, "MD5": "aca701032b1c4411afc4d9fa367796ba", "URL": "https://drive.google.com/u/0/uc?id=12ycYSzLIG253AFN35Y6qoyf9wtkOjakp", "first_line": ["Vor einigen Jahren, hier bei TED, stellte Peter Skillman einen Design-Wettbewerb namens \"Die Marshmallow-Herausforderung\" vor.\n","Several years ago here at TED, Peter Skillman introduced a design challenge called the marshmallow challenge.\n"]}, +{"dataset_name": "WMT14", "split": "train", "NUM_LINES": 4500966, "MD5": "874ab6bbfe9c21ec987ed1b9347f95ec", "URL": "https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8", "first_line": ["Wiederaufnahme der Sitzungsperiode\n", "Res@@ um@@ ption of the session\n"]}, +{"dataset_name": "WMT14", "split": "valid", "NUM_LINES": 3000, "MD5": "874ab6bbfe9c21ec987ed1b9347f95ec", "URL": "https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8", "first_line": ["Eine repub@@ li@@ kanische Strategie , um der Wieder@@ wahl von Obama entgegen@@ zu@@ treten\n", "A Republic@@ an strategy to counter the re-@@ election of Obama\n"]}, +{"dataset_name": "WMT14", "split": "test", "NUM_LINES": 3003, "MD5": "874ab6bbfe9c21ec987ed1b9347f95ec", "URL": "https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8", "first_line": ["Gut@@ ach : Noch mehr Sicherheit f\u00fcr Fu\u00dfg\u00e4n@@ ger\n", "Gut@@ ach : Incre@@ ased safety for pedestri@@ ans\n"]}, +{"dataset_name": "WikiText2", "split": "train", "NUM_LINES": 36718, "MD5": "542ccefacc6c27f945fb54453812b3cd", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip", "first_line": " \n"}, +{"dataset_name": "WikiText2", "split": "valid", "NUM_LINES": 3760, "MD5": "542ccefacc6c27f945fb54453812b3cd", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip", "first_line": " \n"}, +{"dataset_name": "WikiText2", "split": "test", "NUM_LINES": 4358, "MD5": "542ccefacc6c27f945fb54453812b3cd", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip", "first_line": " \n"}, +{"dataset_name": "WikiText103", "split": "train", "NUM_LINES": 1801350, "MD5": "9ddaacaf6af0710eda8c456decff7832", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip", "first_line": " \n"}, +{"dataset_name": "WikiText103", "split": "valid", "NUM_LINES": 3760, "MD5": "9ddaacaf6af0710eda8c456decff7832", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip", "first_line": " \n"}, +{"dataset_name": "WikiText103", "split": "test", "NUM_LINES": 4358, "MD5": "9ddaacaf6af0710eda8c456decff7832", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip", "first_line": " \n"}, +{"dataset_name": "PennTreebank", "split": "train", "NUM_LINES": 42068, "MD5": {"train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", "valid": "aa0affc06ff7c36e977d7cd49e3839bf", "test": "8b80168b89c18661a38ef683c0dc3721"}, "URL": {"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt"}, "first_line": " aer banknote berlitz calloway centrust cluett fromstein gitano guterman hydro-quebec ipo kia memotec mlx nahb punts rake regatta rubens sim snack-food ssangyong swapo wachter \n"}, +{"dataset_name": "PennTreebank", "split": "valid", "NUM_LINES": 3370, "MD5": {"train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", "valid": "aa0affc06ff7c36e977d7cd49e3839bf", "test": "8b80168b89c18661a38ef683c0dc3721"}, "URL": {"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt"}, "first_line": " consumers may want to move their telephones a little closer to the tv set \n"}, +{"dataset_name": "PennTreebank", "split": "test", "NUM_LINES": 3761, "MD5": {"train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", "valid": "aa0affc06ff7c36e977d7cd49e3839bf", "test": "8b80168b89c18661a38ef683c0dc3721"}, "URL": {"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt"}, "first_line": " no it was n't black monday \n"}, +{"dataset_name": "SQuAD1", "split": "train", "NUM_LINES": 87599, "MD5": {"train": "981b29407e0affa3b1b156f72073b945", "dev": "3e85deb501d4e538b6bc56f786231552"}, "URL": {"train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json", "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json"}, "first_line": ["Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \"Venite Ad Me Omnes\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.", "To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?", ["Saint Bernadette Soubirous"], [515]]}, +{"dataset_name": "SQuAD1", "split": "dev", "NUM_LINES": 10570, "MD5": {"train": "981b29407e0affa3b1b156f72073b945", "dev": "3e85deb501d4e538b6bc56f786231552"}, "URL": {"train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json", "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json"}, "first_line": ["Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24\u201310 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50.", "Which NFL team represented the AFC at Super Bowl 50?", ["Denver Broncos", "Denver Broncos", "Denver Broncos"], [177, 177, 177]]}, +{"dataset_name": "SQuAD2", "split": "train", "NUM_LINES": 130319, "MD5": {"train": "62108c273c268d70893182d5cf8df740", "dev": "246adae8b7002f8679c027697b0b7cf8"}, "URL": {"train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json", "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json"}, "first_line": ["Beyonc\u00e9 Giselle Knowles-Carter (/bi\u02d0\u02c8j\u0252nse\u026a/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child. Managed by her father, Mathew Knowles, the group became one of the world's best-selling girl groups of all time. Their hiatus saw the release of Beyonc\u00e9's debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles \"Crazy in Love\" and \"Baby Boy\".", "When did Beyonce start becoming popular?", ["in the late 1990s"], [269]]}, +{"dataset_name": "SQuAD2", "split": "dev", "NUM_LINES": 11873, "MD5": {"train": "62108c273c268d70893182d5cf8df740", "dev": "246adae8b7002f8679c027697b0b7cf8"}, "URL": {"train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json", "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json"}, "first_line": ["The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse (\"Norman\" comes from \"Norseman\") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.", "In what country is Normandy located?", ["France", "France", "France", "France"], [159, 159, 159, 159]]}, +{"dataset_name": "EnWik9", "split": "train", "NUM_LINES": 13147026, "MD5": "3e773f8a1577fda2e27f871ca17f31fd", "URL": "http://mattmahoney.net/dc/enwik9.zip", "first_line": "\n"}] +} From 4e3ba74dd67b85d77da047a52ac6bf3a8a7120f1 Mon Sep 17 00:00:00 2001 From: Guanheng Zhang Date: Mon, 8 Mar 2021 14:09:25 -0800 Subject: [PATCH 2/3] update parameterized --- test/common/parameterized_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/common/parameterized_utils.py b/test/common/parameterized_utils.py index 85d5bcb0f5..6f77b469f3 100644 --- a/test/common/parameterized_utils.py +++ b/test/common/parameterized_utils.py @@ -14,4 +14,4 @@ def get_asset_path(*paths): def load_params(*paths): with open(get_asset_path(*paths), 'r') as file: - return [param(json.loads(line)) for line in file] + return [param(line) for line in json.load(file)['meta_data']] From 26482770d83b722216818d0bcb9396b86a365fa8 Mon Sep 17 00:00:00 2001 From: Guanheng Zhang Date: Mon, 8 Mar 2021 14:15:53 -0800 Subject: [PATCH 3/3] fix lint --- test/asset/raw_datasets.json | 1104 ++++++++++++++++++++++++++++++++-- 1 file changed, 1054 insertions(+), 50 deletions(-) diff --git a/test/asset/raw_datasets.json b/test/asset/raw_datasets.json index 20af644e3b..9e40872299 100644 --- a/test/asset/raw_datasets.json +++ b/test/asset/raw_datasets.json @@ -1,51 +1,1055 @@ -{"meta_data": -[{"dataset_name": "IMDB", "split": "train", "NUM_LINES": 25000, "MD5": "7c2ac02c03563afcf9b574c7e56c153a", "URL": "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", "first_line": ["neg", "I rented I AM CURIOUS-YELLOW from my video store because of all the controversy that surrounded it when it was first released in 1967. I also heard that at first it was seized by U.S. customs if it ever tried to enter this country, therefore being a fan of films considered \"controversial\" I really had to see this for myself.

The plot is centered around a young Swedish drama student named Lena who wants to learn everything she can about life. In particular she wants to focus her attentions to making some sort of documentary on what the average Swede thought about certain political issues such as the Vietnam War and race issues in the United States. In between asking politicians and ordinary denizens of Stockholm about their opinions on politics, she has sex with her drama teacher, classmates, and married men.

What kills me about I AM CURIOUS-YELLOW is that 40 years ago, this was considered pornographic. Really, the sex and nudity scenes are few and far between, even then it's not shot like some cheaply made porno. While my countrymen mind find it shocking, in reality sex and nudity are a major staple in Swedish cinema. Even Ingmar Bergman, arguably their answer to good old boy John Ford, had sex scenes in his films.

I do commend the filmmakers for the fact that any sex shown in the film is shown for artistic purposes rather than just to shock people and make money to be shown in pornographic theaters in America. I AM CURIOUS-YELLOW is a good film for anyone wanting to study the meat and potatoes (no pun intended) of Swedish cinema. But really, this film doesn't have much of a plot."]}, -{"dataset_name": "IMDB", "split": "test", "NUM_LINES": 25000, "MD5": "7c2ac02c03563afcf9b574c7e56c153a", "URL": "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", "first_line": ["neg", "I love sci-fi and am willing to put up with a lot. Sci-fi movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original). Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn't match the background, and painfully one-dimensional characters cannot be overcome with a 'sci-fi' setting. (I'm sure there are those of you out there who think Babylon 5 is good sci-fi TV. It's not. It's clich\u00e9d and uninspiring.) While US viewers might like emotion and character development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may treat important issues, yet not as a serious philosophy. It's really difficult to care about the characters here as they are not simply foolish, just missing a spark of life. Their actions and reactions are wooden and predictable, often painful to watch. The makers of Earth KNOW it's rubbish as they have to always say \"Gene Roddenberry's Earth...\" otherwise people would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull, cheap, poorly edited (watching it without advert breaks really brings this home) trudging Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring him back as another actor. Jeeez! Dallas all over again."]}, -{"dataset_name": "AG_NEWS", "split": "train", "NUM_LINES": 120000, "MD5": "b1a00f826fdfbd249f79597b59e1dc12", "URL": "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/train.csv", "first_line": [3, "Wall St. Bears Claw Back Into the Black (Reuters) Reuters - Short-sellers, Wall Street's dwindling\\band of ultra-cynics, are seeing green again."]}, -{"dataset_name": "AG_NEWS", "split": "test", "NUM_LINES": 7600, "MD5": "d52ea96a97a2d943681189a97654912d", "URL": "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/test.csv", "first_line": [3, "Fears for T N pension after talks Unions representing workers at Turner Newall say they are 'disappointed' after talks with stricken parent firm Federal Mogul."]}, -{"dataset_name": "SogouNews", "split": "train", "NUM_LINES": 450000, "MD5": "0c1700ba70b73f964dd8de569d3fd03e", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbUkVqNEszd0pHaFE", "first_line": [4, "2008 di4 qi1 jie4 qi1ng da3o guo2 ji4 che1 zha3n me3i nv3 mo2 te4 2008di4 qi1 jie4 qi1ng da3o guo2 ji4 che1 zha3n yu2 15 ri4 za4i qi1ng da3o guo2 ji4 hui4 zha3n zho1ng xi1n she4ng da4 ka1i mu4 . be3n ci4 che1 zha3n jia1ng chi2 xu4 da4o be3n yue4 19 ri4 . ji1n nia2n qi1ng da3o guo2 ji4 che1 zha3n shi4 li4 nia2n da3o che2ng che1 zha3n gui1 mo2 zui4 da4 di2 yi1 ci4 , shi3 yo4ng lia3o qi1ng da3o guo2 ji4 hui4 zha3n zho1ng xi1n di2 qua2n bu4 shi4 ne4i wa4i zha3n gua3n . yi3 xia4 we2i xia4n cha3ng mo2 te4 tu2 pia4n ."]}, -{"dataset_name": "SogouNews", "split": "test", "NUM_LINES": 60000, "MD5": "0c1700ba70b73f964dd8de569d3fd03e", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbUkVqNEszd0pHaFE", "first_line": [1, " ti3 ca1o shi4 jie4 be1i : che2ng fe1i na2 pi2ng he2ng mu4 zi4 yo2u ca1o ji1n pa2i su4 du4 : ( shuo1 mi2ng : dia3n ji1 zi4 do4ng bo1 fa4ng )\\n shuo1 mi2ng : dia3n ji1 ga1i a4n niu3 , xua3n ze2 yi1 lu4n ta2n ji2 ke3 "]}, -{"dataset_name": "DBpedia", "split": "train", "NUM_LINES": 560000, "MD5": "dca7b1ae12b1091090db52aa7ec5ca64", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k", "first_line": [1, "E. D. Abbott Ltd Abbott of Farnham E D Abbott Limited was a British coachbuilding business based in Farnham Surrey trading under that name from 1929. A major part of their output was under sub-contract to motor vehicle manufacturers. Their business closed in 1972."]}, -{"dataset_name": "DBpedia", "split": "test", "NUM_LINES": 70000, "MD5": "dca7b1ae12b1091090db52aa7ec5ca64", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k", "first_line": [1, "TY KU TY KU /ta\u026aku\u02d0/ is an American alcoholic beverage company that specializes in sake and other spirits. The privately-held company was founded in 2004 and is headquartered in New York City New York. While based in New York TY KU's beverages are made in Japan through a joint venture with two sake breweries. Since 2011 TY KU's growth has extended its products into all 50 states."]}, -{"dataset_name": "YelpReviewPolarity", "split": "train", "NUM_LINES": 560000, "MD5": "620c8ae4bd5a150b730f1ba9a7c6a4d3", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbNUpYQ2N3SGlFaDg", "first_line": [1, "Unfortunately, the frustration of being Dr. Goldberg's patient is a repeat of the experience I've had with so many other doctors in NYC -- good doctor, terrible staff. It seems that his staff simply never answers the phone. It usually takes 2 hours of repeated calling to get an answer. Who has time for that or wants to deal with it? I have run into this problem with many other doctors and I just don't get it. You have office workers, you have patients with medical needs, why isn't anyone answering the phone? It's incomprehensible and not work the aggravation. It's with regret that I feel that I have to give Dr. Goldberg 2 stars."]}, -{"dataset_name": "YelpReviewPolarity", "split": "test", "NUM_LINES": 38000, "MD5": "620c8ae4bd5a150b730f1ba9a7c6a4d3", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbNUpYQ2N3SGlFaDg", "first_line": [2, "Contrary to other reviews, I have zero complaints about the service or the prices. I have been getting tire service here for the past 5 years now, and compared to my experience with places like Pep Boys, these guys are experienced and know what they're doing. \\nAlso, this is one place that I do not feel like I am being taken advantage of, just because of my gender. Other auto mechanics have been notorious for capitalizing on my ignorance of cars, and have sucked my bank account dry. But here, my service and road coverage has all been well explained - and let up to me to decide. \\nAnd they just renovated the waiting room. It looks a lot better than it did in previous years."]}, -{"dataset_name": "YelpReviewFull", "split": "train", "NUM_LINES": 650000, "MD5": "f7ddfafed1033f68ec72b9267863af6c", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbZlU4dXhHTFhZQU0", "first_line": [5, "dr. goldberg offers everything i look for in a general practitioner. he's nice and easy to talk to without being patronizing; he's always on time in seeing his patients; he's affiliated with a top-notch hospital (nyu) which my parents have explained to me is very important in case something happens and you need surgery; and you can get referrals to see specialists without having to see him first. really, what more do you need? i'm sitting here trying to think of any complaints i have about him, but i'm really drawing a blank."]}, -{"dataset_name": "YelpReviewFull", "split": "test", "NUM_LINES": 50000, "MD5": "f7ddfafed1033f68ec72b9267863af6c", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbZlU4dXhHTFhZQU0", "first_line": [1, "I got 'new' tires from them and within two weeks got a flat. I took my car to a local mechanic to see if i could get the hole patched, but they said the reason I had a flat was because the previous patch had blown - WAIT, WHAT? I just got the tire and never needed to have it patched? This was supposed to be a new tire. \\nI took the tire over to Flynn's and they told me that someone punctured my tire, then tried to patch it. So there are resentful tire slashers? I find that very unlikely. After arguing with the guy and telling him that his logic was far fetched he said he'd give me a new tire \\\"this time\\\". \\nI will never go back to Flynn's b/c of the way this guy treated me and the simple fact that they gave me a used tire!"]}, -{"dataset_name": "YahooAnswers", "split": "train", "NUM_LINES": 1400000, "MD5": "f3f9899b997a42beb24157e62e3eea8d", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9Qhbd2JNdDBsQUdocVU", "first_line": [5, "why doesn't an optical mouse work on a glass table? or even on some surfaces? Optical mice use an LED and a camera to rapidly capture images of the surface beneath the mouse. The infomation from the camera is analyzed by a DSP (Digital Signal Processor) and used to detect imperfections in the underlying surface and determine motion. Some materials, such as glass, mirrors or other very shiny, uniform surfaces interfere with the ability of the DSP to accurately analyze the surface beneath the mouse. \\nSince glass is transparent and very uniform, the mouse is unable to pick up enough imperfections in the underlying surface to determine motion. Mirrored surfaces are also a problem, since they constantly reflect back the same image, causing the DSP not to recognize motion properly. When the system is unable to see surface changes associated with movement, the mouse will not work properly."]}, -{"dataset_name": "YahooAnswers", "split": "test", "NUM_LINES": 60000, "MD5": "f3f9899b997a42beb24157e62e3eea8d", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9Qhbd2JNdDBsQUdocVU", "first_line": [9, "What makes friendship click? How does the spark keep going? good communication is what does it. Can you move beyond small talk and say what's really on your mind. If you start doing this, my expereince is that potentially good friends will respond or shun you. Then you know who the really good friends are."]}, -{"dataset_name": "AmazonReviewPolarity", "split": "train", "NUM_LINES": 3600000, "MD5": "fe39f8b653cada45afd5792e0f0e8f9b", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbaW12WVVZS2drcnM", "first_line": [2, "Stuning even for the non-gamer This sound track was beautiful! It paints the senery in your mind so well I would recomend it even to people who hate vid. game music! I have played the game Chrono Cross but out of all of the games I have ever played it has the best music! It backs away from crude keyboarding and takes a fresher step with grate guitars and soulful orchestras. It would impress anyone who cares to listen! ^_^"]}, -{"dataset_name": "AmazonReviewPolarity", "split": "test", "NUM_LINES": 400000, "MD5": "fe39f8b653cada45afd5792e0f0e8f9b", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbaW12WVVZS2drcnM", "first_line": [2, "Great CD My lovely Pat has one of the GREAT voices of her generation. I have listened to this CD for YEARS and I still LOVE IT. When I'm in a good mood it makes me feel better. A bad mood just evaporates like sugar in the rain. This CD just oozes LIFE. Vocals are jusat STUUNNING and lyrics just kill. One of life's hidden gems. This is a desert isle CD in my book. Why she never made it big is just beyond me. Everytime I play this, no matter black, white, young, old, male, female EVERYBODY says one thing \"Who was that singing ?\""]}, -{"dataset_name": "AmazonReviewFull", "split": "train", "NUM_LINES": 3000000, "MD5": "57d28bd5d930e772930baddf36641c7c", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbZVhsUnRWRDhETzA", "first_line": [3, "more like funchuck Gave this to my dad for a gag gift after directing \"Nunsense,\" he got a reall kick out of it!"]}, -{"dataset_name": "AmazonReviewFull", "split": "test", "NUM_LINES": 650000, "MD5": "57d28bd5d930e772930baddf36641c7c", "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbZVhsUnRWRDhETzA", "first_line": [1, "mens ultrasheer This model may be ok for sedentary types, but I'm active and get around alot in my job - consistently found these stockings rolled up down by my ankles! Not Good!! Solution: go with the standard compression stocking, 20-30, stock #114622. Excellent support, stays up and gives me what I need. Both pair of these also tore as I struggled to pull them up all the time. Good riddance/bad investment!"]}, -{"dataset_name": "UDPOS", "split": "train", "NUM_LINES": 12543, "MD5": "bdcac7c52d934656bae1699541424545", "URL": "https://bitbucket.org/sivareddyg/public/downloads/en-ud-v2.zip", "first_line": [["Al", "-", "Zaman", ":", "American", "forces", "killed", "Shaikh", "Abdullah", "al", "-", "Ani", ",", "the", "preacher", "at", "the", "mosque", "in", "the", "town", "of", "Qaim", ",", "near", "the", "Syrian", "border", "."], ["PROPN", "PUNCT", "PROPN", "PUNCT", "ADJ", "NOUN", "VERB", "PROPN", "PROPN", "PROPN", "PUNCT", "PROPN", "PUNCT", "DET", "NOUN", "ADP", "DET", "NOUN", "ADP", "DET", "NOUN", "ADP", "PROPN", "PUNCT", "ADP", "DET", "ADJ", "NOUN", "PUNCT"], ["NNP", "HYPH", "NNP", ":", "JJ", "NNS", "VBD", "NNP", "NNP", "NNP", "HYPH", "NNP", ",", "DT", "NN", "IN", "DT", "NN", "IN", "DT", "NN", "IN", "NNP", ",", "IN", "DT", "JJ", "NN", "."]]}, -{"dataset_name": "UDPOS", "split": "valid", "NUM_LINES": 2002, "MD5": "bdcac7c52d934656bae1699541424545", "URL": "https://bitbucket.org/sivareddyg/public/downloads/en-ud-v2.zip", "first_line": [["From", "the", "AP", "comes", "this", "story", ":"], ["ADP", "DET", "PROPN", "VERB", "DET", "NOUN", "PUNCT"], ["IN", "DT", "NNP", "VBZ", "DT", "NN", ":"]]}, -{"dataset_name": "UDPOS", "split": "test", "NUM_LINES": 2077, "MD5": "bdcac7c52d934656bae1699541424545", "URL": "https://bitbucket.org/sivareddyg/public/downloads/en-ud-v2.zip", "first_line": [["What", "if", "Google", "Morphed", "Into", "GoogleOS", "?"], ["PRON", "SCONJ", "PROPN", "VERB", "ADP", "PROPN", "PUNCT"], ["WP", "IN", "NNP", "VBD", "IN", "NNP", "."]]}, -{"dataset_name": "CoNLL2000Chunking", "split": "train", "NUM_LINES": 8936, "MD5": {"train": "6969c2903a1f19a83569db643e43dcc8", "test": "a916e1c2d83eb3004b38fc6fcd628939"}, "URL": {"train": "https://www.clips.uantwerpen.be/conll2000/chunking/train.txt.gz", "test": "https://www.clips.uantwerpen.be/conll2000/chunking/test.txt.gz"}, "first_line": [["Confidence", "in", "the", "pound", "is", "widely", "expected", "to", "take", "another", "sharp", "dive", "if", "trade", "figures", "for", "September", ",", "due", "for", "release", "tomorrow", ",", "fail", "to", "show", "a", "substantial", "improvement", "from", "July", "and", "August", "'s", "near-record", "deficits", "."], ["NN", "IN", "DT", "NN", "VBZ", "RB", "VBN", "TO", "VB", "DT", "JJ", "NN", "IN", "NN", "NNS", "IN", "NNP", ",", "JJ", "IN", "NN", "NN", ",", "VB", "TO", "VB", "DT", "JJ", "NN", "IN", "NNP", "CC", "NNP", "POS", "JJ", "NNS", "."], ["B-NP", "B-PP", "B-NP", "I-NP", "B-VP", "I-VP", "I-VP", "I-VP", "I-VP", "B-NP", "I-NP", "I-NP", "B-SBAR", "B-NP", "I-NP", "B-PP", "B-NP", "O", "B-ADJP", "B-PP", "B-NP", "B-NP", "O", "B-VP", "I-VP", "I-VP", "B-NP", "I-NP", "I-NP", "B-PP", "B-NP", "I-NP", "I-NP", "B-NP", "I-NP", "I-NP", "O"]]}, -{"dataset_name": "CoNLL2000Chunking", "split": "test", "NUM_LINES": 2012, "MD5": {"train": "6969c2903a1f19a83569db643e43dcc8", "test": "a916e1c2d83eb3004b38fc6fcd628939"}, "URL": {"train": "https://www.clips.uantwerpen.be/conll2000/chunking/train.txt.gz", "test": "https://www.clips.uantwerpen.be/conll2000/chunking/test.txt.gz"}, "first_line": [["Rockwell", "International", "Corp.", "'s", "Tulsa", "unit", "said", "it", "signed", "a", "tentative", "agreement", "extending", "its", "contract", "with", "Boeing", "Co.", "to", "provide", "structural", "parts", "for", "Boeing", "'s", "747", "jetliners", "."], ["NNP", "NNP", "NNP", "POS", "NNP", "NN", "VBD", "PRP", "VBD", "DT", "JJ", "NN", "VBG", "PRP$", "NN", "IN", "NNP", "NNP", "TO", "VB", "JJ", "NNS", "IN", "NNP", "POS", "CD", "NNS", "."], ["B-NP", "I-NP", "I-NP", "B-NP", "I-NP", "I-NP", "B-VP", "B-NP", "B-VP", "B-NP", "I-NP", "I-NP", "B-VP", "B-NP", "I-NP", "B-PP", "B-NP", "I-NP", "B-VP", "I-VP", "B-NP", "I-NP", "B-PP", "B-NP", "B-NP", "I-NP", "I-NP", "O"]]}, -{"dataset_name": "Multi30k", "split": "train", "NUM_LINES": 29000, "MD5": ["d9a5fc268917725a2b0efce3a0cc8607", "81ff90b99829c0cd4b1b587d394afd39", "0065d13af80720a55ca8153d126e6627", "6cb767741dcad3931f966fefbc05203f", "62f36422bfab90fb42a560546b704009", "540da4566bb6dd35fdbc720218b742b7", "613eb4a3f0c2b13f0871ced946851b0e", "d848fe0ae8b9447209fb49c5c31cb3d2", "abc13b4042f4fef1cdff6de3b6c53b71", "cbf5bfc2147706f228d288e1b18bf4af", "bdfe4222f4692ccaa1e3389460f0890e", "0e1ee2b4145795bd180b193424db204b", "1cff688d1aadef7fdb22e9ad27d6fd2c", "3e10289959d0059952511c31df3c7550"], "URL": ["https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/train.cs.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/train.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/train.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/train.fr.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.1.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.2.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.3.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.4.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.5.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.1.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.2.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.3.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.4.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.5.en.gz"], "first_line": ["Zwei junge wei\u00dfe M\u00e4nner sind im Freien in der N\u00e4he vieler B\u00fcsche.\n", "Two young, White males are outside near many bushes.\n"]}, -{"dataset_name": "Multi30k", "split": "valid", "NUM_LINES": 1014, "MD5": ["83cdc082f646b769095615384cf5c0ca", "6e0e229eb049e3fc99a1ef02fb2d5f91", "2b69aa9253948ac9f67e94917272dd40", "93fc564584b7e5ba410c761ea5a1c682", "b26486ede1d4436d5acf6e38c65bb44d", "16165248083beacebfe18866d5f4f0ae", "7180780822d4b600eb81c1ccf171c230", "8edb43c90cae66ec762748a968089b99", "873a377a348713d3ab84db1fb57cdede", "df57faf5f00d434d2559c021ef55f1aa", "9077a5127480cc799116384de501bd70", "c1f697c3b6dfb7305349db34e26b45fc", "acb5ea26a577ceccfae6337181c31716", "680816e0938fea5cf5331444bc09a4cf"], "URL": ["https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/val.cs.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/val.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/val.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/val.fr.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.1.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.2.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.3.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.4.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.5.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.1.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.2.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.3.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.4.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.5.en.gz"], "first_line": ["Eine Gruppe von M\u00e4nnern l\u00e4dt Baumwolle auf einen Lastwagen\n", "A group of men are loading cotton onto a truck\n"]}, -{"dataset_name": "Multi30k", "split": "test", "NUM_LINES": 1000, "MD5": ["3104872229daa1bef3b401d44dd2220b", "efd67d314d98489b716b145475101932", "6a8d5c87f6ae19e3d35681aa6fd16571", "e8cd6ec2bc8a11fc846fa48a46e3d0bb", "ff2c0fcb4893a13bd73414306bc250ae", "005396bac545d880abe6f00bbb7dbbb4", "a7b684e0edbef1d4a23660c8e8e743fd", "a152878809942757a55ce087073486b8", "08dc7cd4a662f31718412de95ca9bfe3", "cb09af7d2b501f9112f2d6a59fa1360d", "4995d10954a804d3cdfd907b9fd093e8", "ac0c72653c140dd96707212a1baa4278", "6dfb42cae4e4fd9a3c40e62ff5398a55", "ece8cec6b87bf00dd12607f3062dae4c", "9a7e7b2dcc33135a32cd621c3b37d2d8", "7d5ef0f069ee2d74dc2fdc6b46cd47fa", "eec05227daba4bb8f3f8f25b1cb335f4", "9318fa08c0c0b96114eadb10eb2fc633", "088ec0765fa213a0eb937a62adfd4996", "5f7c8d0be0ac739856b47d32a9434998", "713ed720636622a54546d5f14f88b00f"], "URL": ["https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2016_flickr.cs.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2016_flickr.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_flickr.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_mscoco.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2016_flickr.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_flickr.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_mscoco.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2018_flickr.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2016_flickr.fr.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_flickr.fr.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_mscoco.fr.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.1.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.2.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.3.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.4.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.5.de.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.1.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.2.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.3.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.4.en.gz", "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.5.en.gz"], "first_line": ["Ein Mann mit einem orangefarbenen Hut, der etwas anstarrt.\n", "A man in an orange hat starring at something.\n"]}, -{"dataset_name": "IWSLT2016", "split": "train", "NUM_LINES": 196884, "MD5": "c393ed3fc2a1b0f004b3331043f615ae", "URL": "https://drive.google.com/uc?id=1l5y6Giag9aRPwGtuZHswh3w5v3qEz8D8", "first_line": ["David Gallo: Das ist Bill Lange. Ich bin Dave Gallo.\n", "David Gallo: This is Bill Lange. I'm Dave Gallo.\n"]}, -{"dataset_name": "IWSLT2016", "split": "valid", "NUM_LINES": 993, "MD5": "c393ed3fc2a1b0f004b3331043f615ae", "URL": "https://drive.google.com/uc?id=1l5y6Giag9aRPwGtuZHswh3w5v3qEz8D8", "first_line": ["Als ich 11 Jahre alt war, wurde ich eines Morgens von den Kl\u00e4ngen heller Freude geweckt.\n", "When I was 11, I remember waking up one morning to the sound of joy in my house.\n"]}, -{"dataset_name": "IWSLT2016", "split": "test", "NUM_LINES": 1305, "MD5": "c393ed3fc2a1b0f004b3331043f615ae", "URL": "https://drive.google.com/uc?id=1l5y6Giag9aRPwGtuZHswh3w5v3qEz8D8", "first_line": ["Als ich in meinen 20ern war, hatte ich meine erste Psychotherapie-Patientin.\n", "When I was in my 20s, I saw my very first psychotherapy client.\n"]}, -{"dataset_name": "IWSLT2017", "split": "train", "NUM_LINES": 206112, "MD5": "aca701032b1c4411afc4d9fa367796ba", "URL": "https://drive.google.com/u/0/uc?id=12ycYSzLIG253AFN35Y6qoyf9wtkOjakp", "first_line": ["Vielen Dank, Chris.\n", "Thank you so much, Chris.\n"]}, -{"dataset_name": "IWSLT2017", "split": "valid", "NUM_LINES": 888, "MD5": "aca701032b1c4411afc4d9fa367796ba", "URL": "https://drive.google.com/u/0/uc?id=12ycYSzLIG253AFN35Y6qoyf9wtkOjakp", "first_line": ["Letztes Jahr habe ich diese beiden Folien gezeigt, um zu veranschaulichen, dass die arktische Eiskappe, die f\u00fcr ann\u00e4hernd drei Millionen Jahre die Gr\u00f6sse der unteren 48 Staaten hatte, um 40 Prozent geschrumpft ist.\n","Last year I showed these two slides so that demonstrate that the arctic ice cap, which for most of the last three million years has been the size of the lower 48 states, has shrunk by 40 percent.\n"]}, -{"dataset_name": "IWSLT2017", "split": "test", "NUM_LINES": 1568, "MD5": "aca701032b1c4411afc4d9fa367796ba", "URL": "https://drive.google.com/u/0/uc?id=12ycYSzLIG253AFN35Y6qoyf9wtkOjakp", "first_line": ["Vor einigen Jahren, hier bei TED, stellte Peter Skillman einen Design-Wettbewerb namens \"Die Marshmallow-Herausforderung\" vor.\n","Several years ago here at TED, Peter Skillman introduced a design challenge called the marshmallow challenge.\n"]}, -{"dataset_name": "WMT14", "split": "train", "NUM_LINES": 4500966, "MD5": "874ab6bbfe9c21ec987ed1b9347f95ec", "URL": "https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8", "first_line": ["Wiederaufnahme der Sitzungsperiode\n", "Res@@ um@@ ption of the session\n"]}, -{"dataset_name": "WMT14", "split": "valid", "NUM_LINES": 3000, "MD5": "874ab6bbfe9c21ec987ed1b9347f95ec", "URL": "https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8", "first_line": ["Eine repub@@ li@@ kanische Strategie , um der Wieder@@ wahl von Obama entgegen@@ zu@@ treten\n", "A Republic@@ an strategy to counter the re-@@ election of Obama\n"]}, -{"dataset_name": "WMT14", "split": "test", "NUM_LINES": 3003, "MD5": "874ab6bbfe9c21ec987ed1b9347f95ec", "URL": "https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8", "first_line": ["Gut@@ ach : Noch mehr Sicherheit f\u00fcr Fu\u00dfg\u00e4n@@ ger\n", "Gut@@ ach : Incre@@ ased safety for pedestri@@ ans\n"]}, -{"dataset_name": "WikiText2", "split": "train", "NUM_LINES": 36718, "MD5": "542ccefacc6c27f945fb54453812b3cd", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip", "first_line": " \n"}, -{"dataset_name": "WikiText2", "split": "valid", "NUM_LINES": 3760, "MD5": "542ccefacc6c27f945fb54453812b3cd", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip", "first_line": " \n"}, -{"dataset_name": "WikiText2", "split": "test", "NUM_LINES": 4358, "MD5": "542ccefacc6c27f945fb54453812b3cd", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip", "first_line": " \n"}, -{"dataset_name": "WikiText103", "split": "train", "NUM_LINES": 1801350, "MD5": "9ddaacaf6af0710eda8c456decff7832", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip", "first_line": " \n"}, -{"dataset_name": "WikiText103", "split": "valid", "NUM_LINES": 3760, "MD5": "9ddaacaf6af0710eda8c456decff7832", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip", "first_line": " \n"}, -{"dataset_name": "WikiText103", "split": "test", "NUM_LINES": 4358, "MD5": "9ddaacaf6af0710eda8c456decff7832", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip", "first_line": " \n"}, -{"dataset_name": "PennTreebank", "split": "train", "NUM_LINES": 42068, "MD5": {"train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", "valid": "aa0affc06ff7c36e977d7cd49e3839bf", "test": "8b80168b89c18661a38ef683c0dc3721"}, "URL": {"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt"}, "first_line": " aer banknote berlitz calloway centrust cluett fromstein gitano guterman hydro-quebec ipo kia memotec mlx nahb punts rake regatta rubens sim snack-food ssangyong swapo wachter \n"}, -{"dataset_name": "PennTreebank", "split": "valid", "NUM_LINES": 3370, "MD5": {"train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", "valid": "aa0affc06ff7c36e977d7cd49e3839bf", "test": "8b80168b89c18661a38ef683c0dc3721"}, "URL": {"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt"}, "first_line": " consumers may want to move their telephones a little closer to the tv set \n"}, -{"dataset_name": "PennTreebank", "split": "test", "NUM_LINES": 3761, "MD5": {"train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", "valid": "aa0affc06ff7c36e977d7cd49e3839bf", "test": "8b80168b89c18661a38ef683c0dc3721"}, "URL": {"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt"}, "first_line": " no it was n't black monday \n"}, -{"dataset_name": "SQuAD1", "split": "train", "NUM_LINES": 87599, "MD5": {"train": "981b29407e0affa3b1b156f72073b945", "dev": "3e85deb501d4e538b6bc56f786231552"}, "URL": {"train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json", "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json"}, "first_line": ["Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \"Venite Ad Me Omnes\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.", "To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?", ["Saint Bernadette Soubirous"], [515]]}, -{"dataset_name": "SQuAD1", "split": "dev", "NUM_LINES": 10570, "MD5": {"train": "981b29407e0affa3b1b156f72073b945", "dev": "3e85deb501d4e538b6bc56f786231552"}, "URL": {"train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json", "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json"}, "first_line": ["Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24\u201310 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50.", "Which NFL team represented the AFC at Super Bowl 50?", ["Denver Broncos", "Denver Broncos", "Denver Broncos"], [177, 177, 177]]}, -{"dataset_name": "SQuAD2", "split": "train", "NUM_LINES": 130319, "MD5": {"train": "62108c273c268d70893182d5cf8df740", "dev": "246adae8b7002f8679c027697b0b7cf8"}, "URL": {"train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json", "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json"}, "first_line": ["Beyonc\u00e9 Giselle Knowles-Carter (/bi\u02d0\u02c8j\u0252nse\u026a/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child. Managed by her father, Mathew Knowles, the group became one of the world's best-selling girl groups of all time. Their hiatus saw the release of Beyonc\u00e9's debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles \"Crazy in Love\" and \"Baby Boy\".", "When did Beyonce start becoming popular?", ["in the late 1990s"], [269]]}, -{"dataset_name": "SQuAD2", "split": "dev", "NUM_LINES": 11873, "MD5": {"train": "62108c273c268d70893182d5cf8df740", "dev": "246adae8b7002f8679c027697b0b7cf8"}, "URL": {"train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json", "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json"}, "first_line": ["The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse (\"Norman\" comes from \"Norseman\") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.", "In what country is Normandy located?", ["France", "France", "France", "France"], [159, 159, 159, 159]]}, -{"dataset_name": "EnWik9", "split": "train", "NUM_LINES": 13147026, "MD5": "3e773f8a1577fda2e27f871ca17f31fd", "URL": "http://mattmahoney.net/dc/enwik9.zip", "first_line": "\n"}] +{ + "meta_data": [ + { + "dataset_name": "IMDB", + "split": "train", + "NUM_LINES": 25000, + "MD5": "7c2ac02c03563afcf9b574c7e56c153a", + "URL": "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", + "first_line": [ + "neg", + "I rented I AM CURIOUS-YELLOW from my video store because of all the controversy that surrounded it when it was first released in 1967. I also heard that at first it was seized by U.S. customs if it ever tried to enter this country, therefore being a fan of films considered \"controversial\" I really had to see this for myself.

The plot is centered around a young Swedish drama student named Lena who wants to learn everything she can about life. In particular she wants to focus her attentions to making some sort of documentary on what the average Swede thought about certain political issues such as the Vietnam War and race issues in the United States. In between asking politicians and ordinary denizens of Stockholm about their opinions on politics, she has sex with her drama teacher, classmates, and married men.

What kills me about I AM CURIOUS-YELLOW is that 40 years ago, this was considered pornographic. Really, the sex and nudity scenes are few and far between, even then it's not shot like some cheaply made porno. While my countrymen mind find it shocking, in reality sex and nudity are a major staple in Swedish cinema. Even Ingmar Bergman, arguably their answer to good old boy John Ford, had sex scenes in his films.

I do commend the filmmakers for the fact that any sex shown in the film is shown for artistic purposes rather than just to shock people and make money to be shown in pornographic theaters in America. I AM CURIOUS-YELLOW is a good film for anyone wanting to study the meat and potatoes (no pun intended) of Swedish cinema. But really, this film doesn't have much of a plot." + ] + }, + { + "dataset_name": "IMDB", + "split": "test", + "NUM_LINES": 25000, + "MD5": "7c2ac02c03563afcf9b574c7e56c153a", + "URL": "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", + "first_line": [ + "neg", + "I love sci-fi and am willing to put up with a lot. Sci-fi movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original). Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn't match the background, and painfully one-dimensional characters cannot be overcome with a 'sci-fi' setting. (I'm sure there are those of you out there who think Babylon 5 is good sci-fi TV. It's not. It's clich\u00e9d and uninspiring.) While US viewers might like emotion and character development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may treat important issues, yet not as a serious philosophy. It's really difficult to care about the characters here as they are not simply foolish, just missing a spark of life. Their actions and reactions are wooden and predictable, often painful to watch. The makers of Earth KNOW it's rubbish as they have to always say \"Gene Roddenberry's Earth...\" otherwise people would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull, cheap, poorly edited (watching it without advert breaks really brings this home) trudging Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring him back as another actor. Jeeez! Dallas all over again." + ] + }, + { + "dataset_name": "AG_NEWS", + "split": "train", + "NUM_LINES": 120000, + "MD5": "b1a00f826fdfbd249f79597b59e1dc12", + "URL": "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/train.csv", + "first_line": [ + 3, + "Wall St. Bears Claw Back Into the Black (Reuters) Reuters - Short-sellers, Wall Street's dwindling\\band of ultra-cynics, are seeing green again." + ] + }, + { + "dataset_name": "AG_NEWS", + "split": "test", + "NUM_LINES": 7600, + "MD5": "d52ea96a97a2d943681189a97654912d", + "URL": "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/test.csv", + "first_line": [ + 3, + "Fears for T N pension after talks Unions representing workers at Turner Newall say they are 'disappointed' after talks with stricken parent firm Federal Mogul." + ] + }, + { + "dataset_name": "SogouNews", + "split": "train", + "NUM_LINES": 450000, + "MD5": "0c1700ba70b73f964dd8de569d3fd03e", + "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbUkVqNEszd0pHaFE", + "first_line": [ + 4, + "2008 di4 qi1 jie4 qi1ng da3o guo2 ji4 che1 zha3n me3i nv3 mo2 te4 2008di4 qi1 jie4 qi1ng da3o guo2 ji4 che1 zha3n yu2 15 ri4 za4i qi1ng da3o guo2 ji4 hui4 zha3n zho1ng xi1n she4ng da4 ka1i mu4 . be3n ci4 che1 zha3n jia1ng chi2 xu4 da4o be3n yue4 19 ri4 . ji1n nia2n qi1ng da3o guo2 ji4 che1 zha3n shi4 li4 nia2n da3o che2ng che1 zha3n gui1 mo2 zui4 da4 di2 yi1 ci4 , shi3 yo4ng lia3o qi1ng da3o guo2 ji4 hui4 zha3n zho1ng xi1n di2 qua2n bu4 shi4 ne4i wa4i zha3n gua3n . yi3 xia4 we2i xia4n cha3ng mo2 te4 tu2 pia4n ." + ] + }, + { + "dataset_name": "SogouNews", + "split": "test", + "NUM_LINES": 60000, + "MD5": "0c1700ba70b73f964dd8de569d3fd03e", + "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbUkVqNEszd0pHaFE", + "first_line": [ + 1, + " ti3 ca1o shi4 jie4 be1i : che2ng fe1i na2 pi2ng he2ng mu4 zi4 yo2u ca1o ji1n pa2i su4 du4 : ( shuo1 mi2ng : dia3n ji1 zi4 do4ng bo1 fa4ng )\\n shuo1 mi2ng : dia3n ji1 ga1i a4n niu3 , xua3n ze2 yi1 lu4n ta2n ji2 ke3 " + ] + }, + { + "dataset_name": "DBpedia", + "split": "train", + "NUM_LINES": 560000, + "MD5": "dca7b1ae12b1091090db52aa7ec5ca64", + "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k", + "first_line": [ + 1, + "E. D. Abbott Ltd Abbott of Farnham E D Abbott Limited was a British coachbuilding business based in Farnham Surrey trading under that name from 1929. A major part of their output was under sub-contract to motor vehicle manufacturers. Their business closed in 1972." + ] + }, + { + "dataset_name": "DBpedia", + "split": "test", + "NUM_LINES": 70000, + "MD5": "dca7b1ae12b1091090db52aa7ec5ca64", + "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k", + "first_line": [ + 1, + "TY KU TY KU /ta\u026aku\u02d0/ is an American alcoholic beverage company that specializes in sake and other spirits. The privately-held company was founded in 2004 and is headquartered in New York City New York. While based in New York TY KU's beverages are made in Japan through a joint venture with two sake breweries. Since 2011 TY KU's growth has extended its products into all 50 states." + ] + }, + { + "dataset_name": "YelpReviewPolarity", + "split": "train", + "NUM_LINES": 560000, + "MD5": "620c8ae4bd5a150b730f1ba9a7c6a4d3", + "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbNUpYQ2N3SGlFaDg", + "first_line": [ + 1, + "Unfortunately, the frustration of being Dr. Goldberg's patient is a repeat of the experience I've had with so many other doctors in NYC -- good doctor, terrible staff. It seems that his staff simply never answers the phone. It usually takes 2 hours of repeated calling to get an answer. Who has time for that or wants to deal with it? I have run into this problem with many other doctors and I just don't get it. You have office workers, you have patients with medical needs, why isn't anyone answering the phone? It's incomprehensible and not work the aggravation. It's with regret that I feel that I have to give Dr. Goldberg 2 stars." + ] + }, + { + "dataset_name": "YelpReviewPolarity", + "split": "test", + "NUM_LINES": 38000, + "MD5": "620c8ae4bd5a150b730f1ba9a7c6a4d3", + "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbNUpYQ2N3SGlFaDg", + "first_line": [ + 2, + "Contrary to other reviews, I have zero complaints about the service or the prices. I have been getting tire service here for the past 5 years now, and compared to my experience with places like Pep Boys, these guys are experienced and know what they're doing. \\nAlso, this is one place that I do not feel like I am being taken advantage of, just because of my gender. Other auto mechanics have been notorious for capitalizing on my ignorance of cars, and have sucked my bank account dry. But here, my service and road coverage has all been well explained - and let up to me to decide. \\nAnd they just renovated the waiting room. It looks a lot better than it did in previous years." + ] + }, + { + "dataset_name": "YelpReviewFull", + "split": "train", + "NUM_LINES": 650000, + "MD5": "f7ddfafed1033f68ec72b9267863af6c", + "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbZlU4dXhHTFhZQU0", + "first_line": [ + 5, + "dr. goldberg offers everything i look for in a general practitioner. he's nice and easy to talk to without being patronizing; he's always on time in seeing his patients; he's affiliated with a top-notch hospital (nyu) which my parents have explained to me is very important in case something happens and you need surgery; and you can get referrals to see specialists without having to see him first. really, what more do you need? i'm sitting here trying to think of any complaints i have about him, but i'm really drawing a blank." + ] + }, + { + "dataset_name": "YelpReviewFull", + "split": "test", + "NUM_LINES": 50000, + "MD5": "f7ddfafed1033f68ec72b9267863af6c", + "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbZlU4dXhHTFhZQU0", + "first_line": [ + 1, + "I got 'new' tires from them and within two weeks got a flat. I took my car to a local mechanic to see if i could get the hole patched, but they said the reason I had a flat was because the previous patch had blown - WAIT, WHAT? I just got the tire and never needed to have it patched? This was supposed to be a new tire. \\nI took the tire over to Flynn's and they told me that someone punctured my tire, then tried to patch it. So there are resentful tire slashers? I find that very unlikely. After arguing with the guy and telling him that his logic was far fetched he said he'd give me a new tire \\\"this time\\\". \\nI will never go back to Flynn's b/c of the way this guy treated me and the simple fact that they gave me a used tire!" + ] + }, + { + "dataset_name": "YahooAnswers", + "split": "train", + "NUM_LINES": 1400000, + "MD5": "f3f9899b997a42beb24157e62e3eea8d", + "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9Qhbd2JNdDBsQUdocVU", + "first_line": [ + 5, + "why doesn't an optical mouse work on a glass table? or even on some surfaces? Optical mice use an LED and a camera to rapidly capture images of the surface beneath the mouse. The infomation from the camera is analyzed by a DSP (Digital Signal Processor) and used to detect imperfections in the underlying surface and determine motion. Some materials, such as glass, mirrors or other very shiny, uniform surfaces interfere with the ability of the DSP to accurately analyze the surface beneath the mouse. \\nSince glass is transparent and very uniform, the mouse is unable to pick up enough imperfections in the underlying surface to determine motion. Mirrored surfaces are also a problem, since they constantly reflect back the same image, causing the DSP not to recognize motion properly. When the system is unable to see surface changes associated with movement, the mouse will not work properly." + ] + }, + { + "dataset_name": "YahooAnswers", + "split": "test", + "NUM_LINES": 60000, + "MD5": "f3f9899b997a42beb24157e62e3eea8d", + "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9Qhbd2JNdDBsQUdocVU", + "first_line": [ + 9, + "What makes friendship click? How does the spark keep going? good communication is what does it. Can you move beyond small talk and say what's really on your mind. If you start doing this, my expereince is that potentially good friends will respond or shun you. Then you know who the really good friends are." + ] + }, + { + "dataset_name": "AmazonReviewPolarity", + "split": "train", + "NUM_LINES": 3600000, + "MD5": "fe39f8b653cada45afd5792e0f0e8f9b", + "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbaW12WVVZS2drcnM", + "first_line": [ + 2, + "Stuning even for the non-gamer This sound track was beautiful! It paints the senery in your mind so well I would recomend it even to people who hate vid. game music! I have played the game Chrono Cross but out of all of the games I have ever played it has the best music! It backs away from crude keyboarding and takes a fresher step with grate guitars and soulful orchestras. It would impress anyone who cares to listen! ^_^" + ] + }, + { + "dataset_name": "AmazonReviewPolarity", + "split": "test", + "NUM_LINES": 400000, + "MD5": "fe39f8b653cada45afd5792e0f0e8f9b", + "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbaW12WVVZS2drcnM", + "first_line": [ + 2, + "Great CD My lovely Pat has one of the GREAT voices of her generation. I have listened to this CD for YEARS and I still LOVE IT. When I'm in a good mood it makes me feel better. A bad mood just evaporates like sugar in the rain. This CD just oozes LIFE. Vocals are jusat STUUNNING and lyrics just kill. One of life's hidden gems. This is a desert isle CD in my book. Why she never made it big is just beyond me. Everytime I play this, no matter black, white, young, old, male, female EVERYBODY says one thing \"Who was that singing ?\"" + ] + }, + { + "dataset_name": "AmazonReviewFull", + "split": "train", + "NUM_LINES": 3000000, + "MD5": "57d28bd5d930e772930baddf36641c7c", + "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbZVhsUnRWRDhETzA", + "first_line": [ + 3, + "more like funchuck Gave this to my dad for a gag gift after directing \"Nunsense,\" he got a reall kick out of it!" + ] + }, + { + "dataset_name": "AmazonReviewFull", + "split": "test", + "NUM_LINES": 650000, + "MD5": "57d28bd5d930e772930baddf36641c7c", + "URL": "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbZVhsUnRWRDhETzA", + "first_line": [ + 1, + "mens ultrasheer This model may be ok for sedentary types, but I'm active and get around alot in my job - consistently found these stockings rolled up down by my ankles! Not Good!! Solution: go with the standard compression stocking, 20-30, stock #114622. Excellent support, stays up and gives me what I need. Both pair of these also tore as I struggled to pull them up all the time. Good riddance/bad investment!" + ] + }, + { + "dataset_name": "UDPOS", + "split": "train", + "NUM_LINES": 12543, + "MD5": "bdcac7c52d934656bae1699541424545", + "URL": "https://bitbucket.org/sivareddyg/public/downloads/en-ud-v2.zip", + "first_line": [ + [ + "Al", + "-", + "Zaman", + ":", + "American", + "forces", + "killed", + "Shaikh", + "Abdullah", + "al", + "-", + "Ani", + ",", + "the", + "preacher", + "at", + "the", + "mosque", + "in", + "the", + "town", + "of", + "Qaim", + ",", + "near", + "the", + "Syrian", + "border", + "." + ], + [ + "PROPN", + "PUNCT", + "PROPN", + "PUNCT", + "ADJ", + "NOUN", + "VERB", + "PROPN", + "PROPN", + "PROPN", + "PUNCT", + "PROPN", + "PUNCT", + "DET", + "NOUN", + "ADP", + "DET", + "NOUN", + "ADP", + "DET", + "NOUN", + "ADP", + "PROPN", + "PUNCT", + "ADP", + "DET", + "ADJ", + "NOUN", + "PUNCT" + ], + [ + "NNP", + "HYPH", + "NNP", + ":", + "JJ", + "NNS", + "VBD", + "NNP", + "NNP", + "NNP", + "HYPH", + "NNP", + ",", + "DT", + "NN", + "IN", + "DT", + "NN", + "IN", + "DT", + "NN", + "IN", + "NNP", + ",", + "IN", + "DT", + "JJ", + "NN", + "." + ] + ] + }, + { + "dataset_name": "UDPOS", + "split": "valid", + "NUM_LINES": 2002, + "MD5": "bdcac7c52d934656bae1699541424545", + "URL": "https://bitbucket.org/sivareddyg/public/downloads/en-ud-v2.zip", + "first_line": [ + [ + "From", + "the", + "AP", + "comes", + "this", + "story", + ":" + ], + [ + "ADP", + "DET", + "PROPN", + "VERB", + "DET", + "NOUN", + "PUNCT" + ], + [ + "IN", + "DT", + "NNP", + "VBZ", + "DT", + "NN", + ":" + ] + ] + }, + { + "dataset_name": "UDPOS", + "split": "test", + "NUM_LINES": 2077, + "MD5": "bdcac7c52d934656bae1699541424545", + "URL": "https://bitbucket.org/sivareddyg/public/downloads/en-ud-v2.zip", + "first_line": [ + [ + "What", + "if", + "Google", + "Morphed", + "Into", + "GoogleOS", + "?" + ], + [ + "PRON", + "SCONJ", + "PROPN", + "VERB", + "ADP", + "PROPN", + "PUNCT" + ], + [ + "WP", + "IN", + "NNP", + "VBD", + "IN", + "NNP", + "." + ] + ] + }, + { + "dataset_name": "CoNLL2000Chunking", + "split": "train", + "NUM_LINES": 8936, + "MD5": { + "train": "6969c2903a1f19a83569db643e43dcc8", + "test": "a916e1c2d83eb3004b38fc6fcd628939" + }, + "URL": { + "train": "https://www.clips.uantwerpen.be/conll2000/chunking/train.txt.gz", + "test": "https://www.clips.uantwerpen.be/conll2000/chunking/test.txt.gz" + }, + "first_line": [ + [ + "Confidence", + "in", + "the", + "pound", + "is", + "widely", + "expected", + "to", + "take", + "another", + "sharp", + "dive", + "if", + "trade", + "figures", + "for", + "September", + ",", + "due", + "for", + "release", + "tomorrow", + ",", + "fail", + "to", + "show", + "a", + "substantial", + "improvement", + "from", + "July", + "and", + "August", + "'s", + "near-record", + "deficits", + "." + ], + [ + "NN", + "IN", + "DT", + "NN", + "VBZ", + "RB", + "VBN", + "TO", + "VB", + "DT", + "JJ", + "NN", + "IN", + "NN", + "NNS", + "IN", + "NNP", + ",", + "JJ", + "IN", + "NN", + "NN", + ",", + "VB", + "TO", + "VB", + "DT", + "JJ", + "NN", + "IN", + "NNP", + "CC", + "NNP", + "POS", + "JJ", + "NNS", + "." + ], + [ + "B-NP", + "B-PP", + "B-NP", + "I-NP", + "B-VP", + "I-VP", + "I-VP", + "I-VP", + "I-VP", + "B-NP", + "I-NP", + "I-NP", + "B-SBAR", + "B-NP", + "I-NP", + "B-PP", + "B-NP", + "O", + "B-ADJP", + "B-PP", + "B-NP", + "B-NP", + "O", + "B-VP", + "I-VP", + "I-VP", + "B-NP", + "I-NP", + "I-NP", + "B-PP", + "B-NP", + "I-NP", + "I-NP", + "B-NP", + "I-NP", + "I-NP", + "O" + ] + ] + }, + { + "dataset_name": "CoNLL2000Chunking", + "split": "test", + "NUM_LINES": 2012, + "MD5": { + "train": "6969c2903a1f19a83569db643e43dcc8", + "test": "a916e1c2d83eb3004b38fc6fcd628939" + }, + "URL": { + "train": "https://www.clips.uantwerpen.be/conll2000/chunking/train.txt.gz", + "test": "https://www.clips.uantwerpen.be/conll2000/chunking/test.txt.gz" + }, + "first_line": [ + [ + "Rockwell", + "International", + "Corp.", + "'s", + "Tulsa", + "unit", + "said", + "it", + "signed", + "a", + "tentative", + "agreement", + "extending", + "its", + "contract", + "with", + "Boeing", + "Co.", + "to", + "provide", + "structural", + "parts", + "for", + "Boeing", + "'s", + "747", + "jetliners", + "." + ], + [ + "NNP", + "NNP", + "NNP", + "POS", + "NNP", + "NN", + "VBD", + "PRP", + "VBD", + "DT", + "JJ", + "NN", + "VBG", + "PRP$", + "NN", + "IN", + "NNP", + "NNP", + "TO", + "VB", + "JJ", + "NNS", + "IN", + "NNP", + "POS", + "CD", + "NNS", + "." + ], + [ + "B-NP", + "I-NP", + "I-NP", + "B-NP", + "I-NP", + "I-NP", + "B-VP", + "B-NP", + "B-VP", + "B-NP", + "I-NP", + "I-NP", + "B-VP", + "B-NP", + "I-NP", + "B-PP", + "B-NP", + "I-NP", + "B-VP", + "I-VP", + "B-NP", + "I-NP", + "B-PP", + "B-NP", + "B-NP", + "I-NP", + "I-NP", + "O" + ] + ] + }, + { + "dataset_name": "Multi30k", + "split": "train", + "NUM_LINES": 29000, + "MD5": [ + "d9a5fc268917725a2b0efce3a0cc8607", + "81ff90b99829c0cd4b1b587d394afd39", + "0065d13af80720a55ca8153d126e6627", + "6cb767741dcad3931f966fefbc05203f", + "62f36422bfab90fb42a560546b704009", + "540da4566bb6dd35fdbc720218b742b7", + "613eb4a3f0c2b13f0871ced946851b0e", + "d848fe0ae8b9447209fb49c5c31cb3d2", + "abc13b4042f4fef1cdff6de3b6c53b71", + "cbf5bfc2147706f228d288e1b18bf4af", + "bdfe4222f4692ccaa1e3389460f0890e", + "0e1ee2b4145795bd180b193424db204b", + "1cff688d1aadef7fdb22e9ad27d6fd2c", + "3e10289959d0059952511c31df3c7550" + ], + "URL": [ + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/train.cs.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/train.de.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/train.en.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/train.fr.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.1.de.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.2.de.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.3.de.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.4.de.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.5.de.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.1.en.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.2.en.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.3.en.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.4.en.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/train.5.en.gz" + ], + "first_line": [ + "Zwei junge wei\u00dfe M\u00e4nner sind im Freien in der N\u00e4he vieler B\u00fcsche.\n", + "Two young, White males are outside near many bushes.\n" + ] + }, + { + "dataset_name": "Multi30k", + "split": "valid", + "NUM_LINES": 1014, + "MD5": [ + "83cdc082f646b769095615384cf5c0ca", + "6e0e229eb049e3fc99a1ef02fb2d5f91", + "2b69aa9253948ac9f67e94917272dd40", + "93fc564584b7e5ba410c761ea5a1c682", + "b26486ede1d4436d5acf6e38c65bb44d", + "16165248083beacebfe18866d5f4f0ae", + "7180780822d4b600eb81c1ccf171c230", + "8edb43c90cae66ec762748a968089b99", + "873a377a348713d3ab84db1fb57cdede", + "df57faf5f00d434d2559c021ef55f1aa", + "9077a5127480cc799116384de501bd70", + "c1f697c3b6dfb7305349db34e26b45fc", + "acb5ea26a577ceccfae6337181c31716", + "680816e0938fea5cf5331444bc09a4cf" + ], + "URL": [ + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/val.cs.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/val.de.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/val.en.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/val.fr.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.1.de.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.2.de.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.3.de.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.4.de.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.5.de.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.1.en.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.2.en.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.3.en.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.4.en.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/val.5.en.gz" + ], + "first_line": [ + "Eine Gruppe von M\u00e4nnern l\u00e4dt Baumwolle auf einen Lastwagen\n", + "A group of men are loading cotton onto a truck\n" + ] + }, + { + "dataset_name": "Multi30k", + "split": "test", + "NUM_LINES": 1000, + "MD5": [ + "3104872229daa1bef3b401d44dd2220b", + "efd67d314d98489b716b145475101932", + "6a8d5c87f6ae19e3d35681aa6fd16571", + "e8cd6ec2bc8a11fc846fa48a46e3d0bb", + "ff2c0fcb4893a13bd73414306bc250ae", + "005396bac545d880abe6f00bbb7dbbb4", + "a7b684e0edbef1d4a23660c8e8e743fd", + "a152878809942757a55ce087073486b8", + "08dc7cd4a662f31718412de95ca9bfe3", + "cb09af7d2b501f9112f2d6a59fa1360d", + "4995d10954a804d3cdfd907b9fd093e8", + "ac0c72653c140dd96707212a1baa4278", + "6dfb42cae4e4fd9a3c40e62ff5398a55", + "ece8cec6b87bf00dd12607f3062dae4c", + "9a7e7b2dcc33135a32cd621c3b37d2d8", + "7d5ef0f069ee2d74dc2fdc6b46cd47fa", + "eec05227daba4bb8f3f8f25b1cb335f4", + "9318fa08c0c0b96114eadb10eb2fc633", + "088ec0765fa213a0eb937a62adfd4996", + "5f7c8d0be0ac739856b47d32a9434998", + "713ed720636622a54546d5f14f88b00f" + ], + "URL": [ + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2016_flickr.cs.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2016_flickr.de.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_flickr.de.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_mscoco.de.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2016_flickr.en.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_flickr.en.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_mscoco.en.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2018_flickr.en.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2016_flickr.fr.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_flickr.fr.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/test_2017_mscoco.fr.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.1.de.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.2.de.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.3.de.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.4.de.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.5.de.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.1.en.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.2.en.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.3.en.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.4.en.gz", + "https://raw.githubusercontent.com/multi30k/dataset/master/data/task2/raw/test_2016.5.en.gz" + ], + "first_line": [ + "Ein Mann mit einem orangefarbenen Hut, der etwas anstarrt.\n", + "A man in an orange hat starring at something.\n" + ] + }, + { + "dataset_name": "IWSLT2016", + "split": "train", + "NUM_LINES": 196884, + "MD5": "c393ed3fc2a1b0f004b3331043f615ae", + "URL": "https://drive.google.com/uc?id=1l5y6Giag9aRPwGtuZHswh3w5v3qEz8D8", + "first_line": [ + "David Gallo: Das ist Bill Lange. Ich bin Dave Gallo.\n", + "David Gallo: This is Bill Lange. I'm Dave Gallo.\n" + ] + }, + { + "dataset_name": "IWSLT2016", + "split": "valid", + "NUM_LINES": 993, + "MD5": "c393ed3fc2a1b0f004b3331043f615ae", + "URL": "https://drive.google.com/uc?id=1l5y6Giag9aRPwGtuZHswh3w5v3qEz8D8", + "first_line": [ + "Als ich 11 Jahre alt war, wurde ich eines Morgens von den Kl\u00e4ngen heller Freude geweckt.\n", + "When I was 11, I remember waking up one morning to the sound of joy in my house.\n" + ] + }, + { + "dataset_name": "IWSLT2016", + "split": "test", + "NUM_LINES": 1305, + "MD5": "c393ed3fc2a1b0f004b3331043f615ae", + "URL": "https://drive.google.com/uc?id=1l5y6Giag9aRPwGtuZHswh3w5v3qEz8D8", + "first_line": [ + "Als ich in meinen 20ern war, hatte ich meine erste Psychotherapie-Patientin.\n", + "When I was in my 20s, I saw my very first psychotherapy client.\n" + ] + }, + { + "dataset_name": "IWSLT2017", + "split": "train", + "NUM_LINES": 206112, + "MD5": "aca701032b1c4411afc4d9fa367796ba", + "URL": "https://drive.google.com/u/0/uc?id=12ycYSzLIG253AFN35Y6qoyf9wtkOjakp", + "first_line": [ + "Vielen Dank, Chris.\n", + "Thank you so much, Chris.\n" + ] + }, + { + "dataset_name": "IWSLT2017", + "split": "valid", + "NUM_LINES": 888, + "MD5": "aca701032b1c4411afc4d9fa367796ba", + "URL": "https://drive.google.com/u/0/uc?id=12ycYSzLIG253AFN35Y6qoyf9wtkOjakp", + "first_line": [ + "Letztes Jahr habe ich diese beiden Folien gezeigt, um zu veranschaulichen, dass die arktische Eiskappe, die f\u00fcr ann\u00e4hernd drei Millionen Jahre die Gr\u00f6sse der unteren 48 Staaten hatte, um 40 Prozent geschrumpft ist.\n", + "Last year I showed these two slides so that demonstrate that the arctic ice cap, which for most of the last three million years has been the size of the lower 48 states, has shrunk by 40 percent.\n" + ] + }, + { + "dataset_name": "IWSLT2017", + "split": "test", + "NUM_LINES": 1568, + "MD5": "aca701032b1c4411afc4d9fa367796ba", + "URL": "https://drive.google.com/u/0/uc?id=12ycYSzLIG253AFN35Y6qoyf9wtkOjakp", + "first_line": [ + "Vor einigen Jahren, hier bei TED, stellte Peter Skillman einen Design-Wettbewerb namens \"Die Marshmallow-Herausforderung\" vor.\n", + "Several years ago here at TED, Peter Skillman introduced a design challenge called the marshmallow challenge.\n" + ] + }, + { + "dataset_name": "WMT14", + "split": "train", + "NUM_LINES": 4500966, + "MD5": "874ab6bbfe9c21ec987ed1b9347f95ec", + "URL": "https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8", + "first_line": [ + "Wiederaufnahme der Sitzungsperiode\n", + "Res@@ um@@ ption of the session\n" + ] + }, + { + "dataset_name": "WMT14", + "split": "valid", + "NUM_LINES": 3000, + "MD5": "874ab6bbfe9c21ec987ed1b9347f95ec", + "URL": "https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8", + "first_line": [ + "Eine repub@@ li@@ kanische Strategie , um der Wieder@@ wahl von Obama entgegen@@ zu@@ treten\n", + "A Republic@@ an strategy to counter the re-@@ election of Obama\n" + ] + }, + { + "dataset_name": "WMT14", + "split": "test", + "NUM_LINES": 3003, + "MD5": "874ab6bbfe9c21ec987ed1b9347f95ec", + "URL": "https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8", + "first_line": [ + "Gut@@ ach : Noch mehr Sicherheit f\u00fcr Fu\u00dfg\u00e4n@@ ger\n", + "Gut@@ ach : Incre@@ ased safety for pedestri@@ ans\n" + ] + }, + { + "dataset_name": "WikiText2", + "split": "train", + "NUM_LINES": 36718, + "MD5": "542ccefacc6c27f945fb54453812b3cd", + "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip", + "first_line": " \n" + }, + { + "dataset_name": "WikiText2", + "split": "valid", + "NUM_LINES": 3760, + "MD5": "542ccefacc6c27f945fb54453812b3cd", + "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip", + "first_line": " \n" + }, + { + "dataset_name": "WikiText2", + "split": "test", + "NUM_LINES": 4358, + "MD5": "542ccefacc6c27f945fb54453812b3cd", + "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip", + "first_line": " \n" + }, + { + "dataset_name": "WikiText103", + "split": "train", + "NUM_LINES": 1801350, + "MD5": "9ddaacaf6af0710eda8c456decff7832", + "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip", + "first_line": " \n" + }, + { + "dataset_name": "WikiText103", + "split": "valid", + "NUM_LINES": 3760, + "MD5": "9ddaacaf6af0710eda8c456decff7832", + "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip", + "first_line": " \n" + }, + { + "dataset_name": "WikiText103", + "split": "test", + "NUM_LINES": 4358, + "MD5": "9ddaacaf6af0710eda8c456decff7832", + "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip", + "first_line": " \n" + }, + { + "dataset_name": "PennTreebank", + "split": "train", + "NUM_LINES": 42068, + "MD5": { + "train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", + "valid": "aa0affc06ff7c36e977d7cd49e3839bf", + "test": "8b80168b89c18661a38ef683c0dc3721" + }, + "URL": { + "train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", + "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", + "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt" + }, + "first_line": " aer banknote berlitz calloway centrust cluett fromstein gitano guterman hydro-quebec ipo kia memotec mlx nahb punts rake regatta rubens sim snack-food ssangyong swapo wachter \n" + }, + { + "dataset_name": "PennTreebank", + "split": "valid", + "NUM_LINES": 3370, + "MD5": { + "train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", + "valid": "aa0affc06ff7c36e977d7cd49e3839bf", + "test": "8b80168b89c18661a38ef683c0dc3721" + }, + "URL": { + "train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", + "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", + "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt" + }, + "first_line": " consumers may want to move their telephones a little closer to the tv set \n" + }, + { + "dataset_name": "PennTreebank", + "split": "test", + "NUM_LINES": 3761, + "MD5": { + "train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", + "valid": "aa0affc06ff7c36e977d7cd49e3839bf", + "test": "8b80168b89c18661a38ef683c0dc3721" + }, + "URL": { + "train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", + "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", + "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt" + }, + "first_line": " no it was n't black monday \n" + }, + { + "dataset_name": "SQuAD1", + "split": "train", + "NUM_LINES": 87599, + "MD5": { + "train": "981b29407e0affa3b1b156f72073b945", + "dev": "3e85deb501d4e538b6bc56f786231552" + }, + "URL": { + "train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json", + "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json" + }, + "first_line": [ + "Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \"Venite Ad Me Omnes\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.", + "To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?", + [ + "Saint Bernadette Soubirous" + ], + [ + 515 + ] + ] + }, + { + "dataset_name": "SQuAD1", + "split": "dev", + "NUM_LINES": 10570, + "MD5": { + "train": "981b29407e0affa3b1b156f72073b945", + "dev": "3e85deb501d4e538b6bc56f786231552" + }, + "URL": { + "train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json", + "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json" + }, + "first_line": [ + "Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24\u201310 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the \"golden anniversary\" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as \"Super Bowl L\"), so that the logo could prominently feature the Arabic numerals 50.", + "Which NFL team represented the AFC at Super Bowl 50?", + [ + "Denver Broncos", + "Denver Broncos", + "Denver Broncos" + ], + [ + 177, + 177, + 177 + ] + ] + }, + { + "dataset_name": "SQuAD2", + "split": "train", + "NUM_LINES": 130319, + "MD5": { + "train": "62108c273c268d70893182d5cf8df740", + "dev": "246adae8b7002f8679c027697b0b7cf8" + }, + "URL": { + "train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json", + "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json" + }, + "first_line": [ + "Beyonc\u00e9 Giselle Knowles-Carter (/bi\u02d0\u02c8j\u0252nse\u026a/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child. Managed by her father, Mathew Knowles, the group became one of the world's best-selling girl groups of all time. Their hiatus saw the release of Beyonc\u00e9's debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles \"Crazy in Love\" and \"Baby Boy\".", + "When did Beyonce start becoming popular?", + [ + "in the late 1990s" + ], + [ + 269 + ] + ] + }, + { + "dataset_name": "SQuAD2", + "split": "dev", + "NUM_LINES": 11873, + "MD5": { + "train": "62108c273c268d70893182d5cf8df740", + "dev": "246adae8b7002f8679c027697b0b7cf8" + }, + "URL": { + "train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json", + "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json" + }, + "first_line": [ + "The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse (\"Norman\" comes from \"Norseman\") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.", + "In what country is Normandy located?", + [ + "France", + "France", + "France", + "France" + ], + [ + 159, + 159, + 159, + 159 + ] + ] + }, + { + "dataset_name": "EnWik9", + "split": "train", + "NUM_LINES": 13147026, + "MD5": "3e773f8a1577fda2e27f871ca17f31fd", + "URL": "http://mattmahoney.net/dc/enwik9.zip", + "first_line": "\n" + } + ] }