Skip to content

Commit

Permalink
more v5 json
Browse files Browse the repository at this point in the history
  • Loading branch information
parkchamchi committed Oct 30, 2024
1 parent 6ca78e7 commit 9f54293
Show file tree
Hide file tree
Showing 5 changed files with 8 additions and 1 deletion.
1 change: 1 addition & 0 deletions samples/v5/Die Leiden des jungen Werther.corpus.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions samples/v5/Ethica - Pars I.corpus.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions samples/v5/Le papillon et le fleur.corpus.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"version": 5, "meatadata": {"title": "Le papillon et le fleur", "author": "Victor Hugo", "annotation-info": "Machine-glossed with fine-tuned gpt-4o-mini (https://github.com/parkchamchi/GlossySnake/blob/master/src/tools/data/gs_240918.jsonl)", "original-language": "fr", "gloss-language": "en", "note": "Initially generated as a v4 file."}, "paragraphs": [{"tokens": [{"txt": "La", "gloss": "The", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "pauvre", "gloss": "poor", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "fleur", "gloss": "flower", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "disait", "gloss": "said", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "au", "gloss": "to-the", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "papillon", "gloss": "butterfly", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "c\u00e9leste:", "gloss": "heavenly:", "is_delimiter": false}, {"txt": "\n", "gloss": null, "is_delimiter": true}, {"txt": "Ne", "gloss": "Not", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "fuis", "gloss": "flee", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "pas!", "gloss": "not!", "is_delimiter": false}, {"txt": "\n", "gloss": null, "is_delimiter": true}, {"txt": "Vois", "gloss": "See", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "comme", "gloss": "how", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "nos", "gloss": "our", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "destins", "gloss": "destinies", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "sont", "gloss": "are", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "diff\u00e9rents.", "gloss": "different.", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "Je", "gloss": "I", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "reste,", "gloss": "remain,", "is_delimiter": false}, {"txt": "\n", "gloss": null, "is_delimiter": true}, {"txt": "Tu", "gloss": "Thou", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "t\u2019en", "gloss": "thyself", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "vas!", "gloss": "goest!", "is_delimiter": false}, {"txt": "\n\n", "gloss": null, "is_delimiter": true}, {"txt": "Pourtant", "gloss": "Yet", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "nous", "gloss": "we", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "nous", "gloss": "we", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "aimons,", "gloss": "love,", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "nous", "gloss": "we", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "vivons", "gloss": "live", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "sans", "gloss": "without", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "les", "gloss": "the", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "hommes", "gloss": "men", "is_delimiter": false}, {"txt": "\n", "gloss": null, "is_delimiter": true}, {"txt": "Et", "gloss": "And", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "loin", "gloss": "far", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "d\u2019eux,", "gloss": "from-them,", "is_delimiter": false}, {"txt": "\n", "gloss": null, "is_delimiter": true}, {"txt": "Et", "gloss": "And", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "nous", "gloss": "we", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "nous", "gloss": "ourselves", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "ressemblons,", "gloss": "resemble,", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "et", "gloss": "and", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "l\u2019on", "gloss": "one", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "dit", "gloss": "says", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "que", "gloss": "that", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "nous", "gloss": "we", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "sommes", "gloss": "are", "is_delimiter": false}, {"txt": "\n", "gloss": null, "is_delimiter": true}, {"txt": "Fleurs", "gloss": "Flowers", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "tous", "gloss": "all", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "deux!", "gloss": "two!", "is_delimiter": false}, {"txt": "\n\n", "gloss": null, "is_delimiter": true}]}, {"tokens": [{"txt": "Mais,", "gloss": "But,", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "h\u00e9las!", "gloss": "alas!", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "l\u2019air", "gloss": "the-air", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "t\u2019emporte", "gloss": "carries-thee-off", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "et", "gloss": "and", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "la", "gloss": "the", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "terre", "gloss": "earth", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "m\u2019encha\u00eene.", "gloss": "enchains-me.", "is_delimiter": false}, {"txt": "\n", "gloss": null, "is_delimiter": true}, {"txt": "Sort", "gloss": "Fate", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "cruel!", "gloss": "cruel!", "is_delimiter": false}, {"txt": "\n", "gloss": null, "is_delimiter": true}, {"txt": "Je", "gloss": "I", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "voudrais", "gloss": "should-like", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "embaumer", "gloss": "to-embalm", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "ton", "gloss": "thy", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "vol", "gloss": "flight", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "de", "gloss": "of", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "mon", "gloss": "my", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "haleine", "gloss": "breath", "is_delimiter": false}, {"txt": "\n", "gloss": null, "is_delimiter": true}, {"txt": "Dans", "gloss": "In", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "le", "gloss": "the", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "ciel!", "gloss": "sky!", "is_delimiter": false}, {"txt": "\n\n\n", "gloss": null, "is_delimiter": true}, {"txt": "Mais", "gloss": "But", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "non,", "gloss": "no,", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "tu", "gloss": "thou", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "vas", "gloss": "goest", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "trop", "gloss": "too", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "loin!", "gloss": "far!", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "\u2013", "gloss": "-", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "Parmi", "gloss": "Among", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "des", "gloss": "the", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "fleurs", "gloss": "flowers", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "sans", "gloss": "without", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "nombre", "gloss": "number", "is_delimiter": false}, {"txt": "\n", "gloss": null, "is_delimiter": true}, {"txt": "Vous", "gloss": "You", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "fuyez,", "gloss": "flee,", "is_delimiter": false}, {"txt": "\n", "gloss": null, "is_delimiter": true}, {"txt": "Et", "gloss": "And", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "moi", "gloss": "me", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "je", "gloss": "I", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "reste", "gloss": "remain", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "seule", "gloss": "alone", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "\u00e0", "gloss": "to", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "voir", "gloss": "see", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "tourner", "gloss": "turn", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "mon", "gloss": "my", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "ombre", "gloss": "shade", "is_delimiter": false}, {"txt": "\n", "gloss": null, "is_delimiter": true}, {"txt": "\u00c0", "gloss": "At", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "mes", "gloss": "my", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "pieds.", "gloss": "feet.", "is_delimiter": false}, {"txt": "\n\n", "gloss": null, "is_delimiter": true}]}, {"tokens": [{"txt": "Tu", "gloss": "Thou", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "fuis,", "gloss": "fleest,", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "puis", "gloss": "then", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "tu", "gloss": "thou", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "reviens;", "gloss": "returnst;", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "puis", "gloss": "then", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "tu", "gloss": "thou", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "t\u2019en", "gloss": "thence", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "vas", "gloss": "goest", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "encore", "gloss": "yet", "is_delimiter": false}, {"txt": "\n", "gloss": null, "is_delimiter": true}, {"txt": "Luire", "gloss": "Shining", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "ailleurs.", "gloss": "elsewhere.", "is_delimiter": false}, {"txt": "\n", "gloss": null, "is_delimiter": true}, {"txt": "Aussi", "gloss": "Also", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "me", "gloss": "me", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "trouves-tu", "gloss": "findest", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "toujours", "gloss": "always", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "\u00e0", "gloss": "at", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "chaque", "gloss": "each", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "aurore", "gloss": "aurora", "is_delimiter": false}, {"txt": "\n", "gloss": null, "is_delimiter": true}, {"txt": "Toute", "gloss": "All", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "en", "gloss": "in", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "pleurs!", "gloss": "tears!", "is_delimiter": false}, {"txt": "\n\n", "gloss": null, "is_delimiter": true}, {"txt": "Oh!", "gloss": "Oh!", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "pour", "gloss": "for", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "que", "gloss": "that", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "notre", "gloss": "our", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "amour", "gloss": "love", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "coule", "gloss": "flow", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "des", "gloss": "some", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "jours", "gloss": "days", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "fid\u00e8les,", "gloss": "faithful,", "is_delimiter": false}, {"txt": "\n", "gloss": null, "is_delimiter": true}, {"txt": "\u00d4", "gloss": "O", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "mon", "gloss": "my", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "roi,", "gloss": "king,", "is_delimiter": false}, {"txt": "\n", "gloss": null, "is_delimiter": true}, {"txt": "Prends", "gloss": "Take", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "comme", "gloss": "like", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "moi", "gloss": "me", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "racine,", "gloss": "root,", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "ou", "gloss": "or", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "donne-moi", "gloss": "give-me", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "des", "gloss": "some", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "ailes", "gloss": "wings", "is_delimiter": false}, {"txt": "\n", "gloss": null, "is_delimiter": true}, {"txt": "Comme", "gloss": "Like", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "\u00e0", "gloss": "at", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}, {"txt": "toi!", "gloss": "thee!", "is_delimiter": false}, {"txt": " ", "gloss": null, "is_delimiter": true}]}]}
1 change: 1 addition & 0 deletions samples/v5/Winterreise.corpus.json

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion samples/v5/index.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
{
"filenames": [
"Tristesse d'Olympio.corpus.json",
"Winterreise.corpus.json",
"Le papillon et le fleur.corpus.json",
"Die Leiden des jungen Werther.corpus.json",
"Ethica - Pars I.corpus.json",
"Der Prozess.corpus.json"
]
}

0 comments on commit 9f54293

Please sign in to comment.