Skip to content

Commit

Permalink
Settings: Add Settings - Measures - Readability - Flesch Reading Ease…
Browse files Browse the repository at this point in the history
…; Work Area: Remove Profiler - Fernández Huerta's Readability Score / Szigriszt's Perspicuity Index
  • Loading branch information
BLKSerene committed Jul 24, 2023
1 parent 54d0e1c commit 4a9fa51
Show file tree
Hide file tree
Showing 11 changed files with 936 additions and 963 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

## [3.3.0](https://github.com/BLKSerene/Wordless/releases/tag/3.3.0) - ??/??/2023
### 🎉 New Features
- Settings: Add Settings - Measures - Readability - Flesch Reading Ease
- Utils: Add khmer-nltk's Khmer sentence tokenizer, word tokenizer, and part-of-speech tagger
- Utils: Add PyThaiNLP's perceptron part-of-speech tagger (Blackboard)
- Utils: Add spaCy's Korean sentence recognizer, word tokenizer, part-of-speech tagger, lemmatizer, and dependency parser
Expand All @@ -35,6 +36,7 @@

### ❌ Removals
- Utils: Remove PyThaiNLP's perceptron part-of-speech tagger (LST20)
- Work Area: Remove Profiler - Fernández Huerta's Readability Score / Szigriszt's Perspicuity Index

### ⏫ Dependency Changes
- Dependencies: Add khmer-nltk
Expand Down
118 changes: 55 additions & 63 deletions doc/doc_eng.md

Large diffs are not rendered by default.

91 changes: 0 additions & 91 deletions doc/measures/readability/fernandez_huertas_readability_score.svg

This file was deleted.

783 changes: 392 additions & 391 deletions doc/measures/readability/re.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
470 changes: 235 additions & 235 deletions doc/measures/readability/wstf.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
256 changes: 136 additions & 120 deletions tests/wl_tests_measures/test_measures_readability.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/wl_tests_work_area/test_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def update_gui(err_msg, texts_stats_files):
count_tokens_lens_syls.append(collections.Counter(len_tokens_syls))
count_tokens_lens_chars.append(collections.Counter(len_tokens_chars))

assert len(readability_statistics) == 24
assert len(readability_statistics) == 22

# Counts
assert count_paras
Expand Down
116 changes: 67 additions & 49 deletions wordless/wl_measures/wl_measures_readability.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,27 +216,6 @@ def devereux_readability_index(main, text):

return grade_placement

# Fernández Huerta's Readability Score
# References:
# Fernández Huerta, J. (1959). Medidas sencillas de lecturabilidad. Consigna, 214, 29–32.
# Law, Gwillim. (2011, May 27). Error in the Fernandez Huerta readability formula. LINGUIST List. https://linguistlist.org/issues/22/22-2332/
def fernandez_huertas_readability_score(main, text):
if text.lang == 'spa' and text.lang in main.settings_global['syl_tokenizers']:
text = get_counts(main, text)

if text.count_words and text.count_sentences:
p = (
206.84
- 60 * (text.count_syls / text.count_words)
- 102 * (text.count_sentences / text.count_words)
)
else:
p = 'text_too_short'
else:
p = 'no_support'

return p

# Flesch-Kincaid Grade Level
# Reference: Kincaid, J. P., Fishburne, R. P., Rogers, R. L., & Chissom, B. S. (1975). Derivation of new readability formulas (automated readability index, fog count, and Flesch reading ease formula) for navy enlisted personnel. Naval Air Station Memphis. https://apps.dtic.mil/sti/pdfs/ADA006655.pdf
def flesch_kincaid_grade_level(main, text):
Expand All @@ -259,8 +238,10 @@ def flesch_kincaid_grade_level(main, text):
# Flesch Reading Ease
# Reference:
# Flesch, R. (1948). A new readability yardstick. Journal of Applied Psychology, 32(3), 221–233. https://doi.org/10.1037/h0057532
# Dutch variant:
# Dutch variant (Douma):
# Douma, W. H. (1960). De leesbaarheid van landbouwbladen: Een onderzoek naar en een toepassing van leesbaarheidsformules [Readability of Dutch farm papers: A discussion and application of readability-formulas]. Afdeling sociologie en sociografie van de Landbouwhogeschool Wageningen. https://edepot.wur.nl/276323
# Dutch variant (Brouwer's Leesindex A):
# Brouwer, R. H. M. (1963). Onderzoek naar de leesmoeilijkheid van Nederlands proza. Paedagogische studiën, 40, 454–464. https://objects.library.uu.nl/reader/index.php?obj=1874-205260&lan=en
# French variant:
# Kandel, L., & Moles A. (1958). Application de l’indice de flesch la langue francaise [applying flesch index to french language]. The Journal of Educational Research, 21, 283–287.
# Kopient, A., & Grabar, N. (2020). Rated lexicon for the simplification of medical texts. In B. Gersbeck-Schierholz (ed.), HEALTHINFO 2020: The fifth international conference on informatics and assistive technologies for health-care, medical support and wellbeing (pp. 11–17). IARIA. https://hal.science/hal-03095275/document
Expand All @@ -272,22 +253,78 @@ def flesch_kincaid_grade_level(main, text):
# Garais, E. (2011). Web applications readability. Journal of Information Systems and Operations Management, 5(1), 117–121. http://www.rebe.rau.ro/RePEc/rau/jisomg/SP11/JISOM-SP11-A13.pdf
# Russian variant:
# Oborneva, I. V. (2006). Автоматизированная оценка сложности учебных текстов на основе статистических параметров [Doctoral dissertation, Institute for Strategy of Education Development of the Russian Academy of Education]. Freereferats.ru. https://static.freereferats.ru/_avtoreferats/01002881899.pdf?ver=3
# Spanish variant (Fernández Huerta):
# Fernández Huerta, J. (1959). Medidas sencillas de lecturabilidad. Consigna, 214, 29–32.
# Garais, E. (2011). Web applications readability. Journal of Information Systems and Operations Management, 5(1), 117–121. http://www.rebe.rau.ro/RePEc/rau/jisomg/SP11/JISOM-SP11-A13.pdf
# Spanish variant (Szigriszt Pazos):
# Szigriszt Pazos, F. (1993). Sistemas predictivos de legibilidad del mensaje escrito: Formula de perspicuidad [Doctoral dissertation, Complutense University of Madrid]. Biblos-e Archivo. https://repositorio.uam.es/bitstream/handle/10486/2488/3907_barrio_cantalejo_ines_maria.pdf?sequence=1&isAllowed=y
def flesch_reading_ease(main, text):
if text.lang in main.settings_global['syl_tokenizers']:
text = get_counts(main, text)

if text.count_words and text.count_sentences:
flesch_re = (
206.835
- 0.846 * (text.count_syls / text.count_words * 100)
- 1.015 * (text.count_words / text.count_sentences)
)
if text.lang == 'nld':
if main.settings_custom['measures']['readability']['re']['variant_nld'] == 'Douma':
re = (
206.84
- 77 * (text.count_syls / text.count_words)
- 0.93 * (text.count_words / text.count_sentences)
)
elif main.settings_custom['measures']['readability']['re']['variant_nld'] == "Brouwer's Leesindex A":
re = (
195
- (200 / 3) * (text.count_syls / text.count_words)
- 2 * (text.count_words / text.count_sentences)
)
elif text.lang == 'fra':
re = (
207
- 73.6 * (text.count_syls / text.count_words)
- 1.015 * (text.count_words / text.count_sentences)
)
elif text.lang.startswith('deu_'):
re = (
180
- 58.5 * (text.count_syls / text.count_words)
- (text.count_words / text.count_sentences)
)
elif text.lang == 'ita':
re = (
217
- 60 * (text.count_syls / text.count_words)
- 1.3 * (text.count_words / text.count_sentences)
)
elif text.lang == 'rus':
re = (
206.835
- 60.1 * (text.count_syls / text.count_words)
- 1.3 * (text.count_words / text.count_sentences)
)
elif text.lang == 'spa':
if main.settings_custom['measures']['readability']['re']['variant_spa'] == 'Fernández Huerta':
re = (
206.84
- 60 * (text.count_syls / text.count_words)
- 1.02 * (text.count_words / text.count_sentences)
)
elif main.settings_custom['measures']['readability']['re']['variant_spa'] == 'Szigriszt Pazos':
re = (
206.84
- 62.3 * (text.count_syls / text.count_words)
- (text.count_words / text.count_sentences)
)
else:
re = (
206.835
- 0.846 * (text.count_syls / text.count_words * 100)
- 1.015 * (text.count_words / text.count_sentences)
)
else:
flesch_re = 'text_too_short'
re = 'text_too_short'
else:
flesch_re = 'no_support'
re = 'no_support'

return flesch_re
return re

# Flesch Reading Ease (Simplified)
# Reference: Farr, J. N., Jenkins, J. J., & Paterson, D. G. (1951). Simplification of Flesch reading ease formula. Journal of Applied Psychology, 35(5), 333–337. https://doi.org/10.1037/h0062427
Expand Down Expand Up @@ -689,25 +726,6 @@ def spache_grade_level(main, text):

return grade_level

# Szigriszt's Perspicuity Index
# Reference: Szigriszt Pazos, F. (1993). Sistemas predictivos de legibilidad del mensaje escrito: Formula de perspicuidad [Doctoral dissertation, Complutense University of Madrid]. Biblos-e Archivo. https://repositorio.uam.es/bitstream/handle/10486/2488/3907_barrio_cantalejo_ines_maria.pdf?sequence=1&isAllowed=y
def szigriszts_perspicuity_index(main, text):
if text.lang == 'spa' and text.lang in main.settings_global['syl_tokenizers']:
text = get_counts(main, text)

if text.count_words and text.count_sentences:
p = (
207
- 62.3 * (text.count_syls / text.count_words)
- (text.count_words / text.count_sentences)
)
else:
p = 'text_too_short'
else:
p = 'no_support'

return p

# Wiener Sachtextformel
# References:
# Bamberger, R., & Vanecek, E. (1984). Lesen – Verstehen – Lernen – Schreiben. Jugend und Volk.
Expand Down
4 changes: 0 additions & 4 deletions wordless/wl_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,6 @@ def __init__(self, parent):
_tr('wl_profiler', 'Coleman-Liau Index'),
_tr('wl_profiler', 'Dale-Chall Readability Score'),
_tr('wl_profiler', 'Devereaux Readability Index'),
_tr('wl_profiler', "Fernández Huerta's Readability Score"),
_tr('wl_profiler', 'Flesch-Kincaid Grade Level'),
_tr('wl_profiler', 'Flesch Reading Ease'),
_tr('wl_profiler', 'Flesch Reading Ease (Simplified)'),
Expand All @@ -379,7 +378,6 @@ def __init__(self, parent):
_tr('wl_profiler', 'Rix'),
_tr('wl_profiler', 'SMOG Grade'),
_tr('wl_profiler', 'Spache Grade Level'),
_tr('wl_profiler', "Szigriszt's Perspicuity Index"),
_tr('wl_profiler', 'Wiener Sachtextformel')
]

Expand Down Expand Up @@ -1178,7 +1176,6 @@ def run(self):
wl_measures_readability.coleman_liau_index(self.main, text),
wl_measures_readability.dale_chall_readability_score(self.main, text),
wl_measures_readability.devereux_readability_index(self.main, text),
wl_measures_readability.fernandez_huertas_readability_score(self.main, text),
wl_measures_readability.flesch_kincaid_grade_level(self.main, text),
wl_measures_readability.flesch_reading_ease(self.main, text),
wl_measures_readability.flesch_reading_ease_simplified(self.main, text),
Expand All @@ -1195,7 +1192,6 @@ def run(self):
wl_measures_readability.rix(self.main, text),
wl_measures_readability.smog_grade(self.main, text),
wl_measures_readability.spache_grade_level(self.main, text),
wl_measures_readability.szigriszts_perspicuity_index(self.main, text),
wl_measures_readability.wiener_sachtextformel(self.main, text)
]
else:
Expand Down
5 changes: 5 additions & 0 deletions wordless/wl_settings/wl_settings_default.py
Original file line number Diff line number Diff line change
Expand Up @@ -1741,6 +1741,11 @@ def init_settings_default(main):
'measures': {
# Settings - Measures - Readability
'readability': {
're': {
'variant_nld': 'Douma',
'variant_spa': 'Fernández Huerta'
},

'wstf': {
'variant': '1'
}
Expand Down
52 changes: 43 additions & 9 deletions wordless/wl_settings/wl_settings_measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,38 +31,72 @@ def __init__(self, main):
self.settings_default = self.main.settings_default['measures']['readability']
self.settings_custom = self.main.settings_custom['measures']['readability']

# Flesch Reading Ease
self.group_box_re = QGroupBox(self.tr('Flesch Reading Ease'), self)

self.label_re_variant_nld = QLabel(self.tr('Dutch variant:'), self)
self.combo_box_re_variant_nld = wl_boxes.Wl_Combo_Box(self)
self.label_re_variant_spa = QLabel(self.tr('Spanish variant:'), self)
self.combo_box_re_variant_spa = wl_boxes.Wl_Combo_Box(self)

self.combo_box_re_variant_nld.addItems([
"Brouwer's Leesindex A",
'Douma',
])
self.combo_box_re_variant_spa.addItems([
'Fernández Huerta',
'Szigriszt Pazos'
])

self.group_box_re.setLayout(wl_layouts.Wl_Layout())
self.group_box_re.layout().addWidget(self.label_re_variant_nld, 0, 0)
self.group_box_re.layout().addWidget(self.combo_box_re_variant_nld, 0, 1)
self.group_box_re.layout().addWidget(self.label_re_variant_spa, 1, 0)
self.group_box_re.layout().addWidget(self.combo_box_re_variant_spa, 1, 1)

self.group_box_re.layout().setColumnStretch(2, 1)

# Wiener Sachtextformel
self.group_box_wstf = QGroupBox(self.tr('Wiener Sachtextformel'), self)

self.label_variant = QLabel(self.tr('Variant:'), self)
self.combo_box_variant = wl_boxes.Wl_Combo_Box(self)
self.label_wstf_variant = QLabel(self.tr('Variant:'), self)
self.combo_box_wstf_variant = wl_boxes.Wl_Combo_Box(self)

self.combo_box_variant.addItems(['1', '2', '3', '4'])
self.combo_box_wstf_variant.addItems(['1', '2', '3', '4'])

self.group_box_wstf.setLayout(wl_layouts.Wl_Layout())
self.group_box_wstf.layout().addWidget(self.label_variant, 0, 0)
self.group_box_wstf.layout().addWidget(self.combo_box_variant, 0, 1)
self.group_box_wstf.layout().addWidget(self.label_wstf_variant, 0, 0)
self.group_box_wstf.layout().addWidget(self.combo_box_wstf_variant, 0, 1)

self.group_box_wstf.layout().setColumnStretch(2, 1)

self.setLayout(wl_layouts.Wl_Layout())
self.layout().addWidget(self.group_box_wstf, 0, 0)
self.layout().addWidget(self.group_box_re, 0, 0)
self.layout().addWidget(self.group_box_wstf, 1, 0)

self.layout().setContentsMargins(6, 4, 6, 4)
self.layout().setRowStretch(1, 1)
self.layout().setRowStretch(2, 1)

def load_settings(self, defaults = False):
if defaults:
settings = copy.deepcopy(self.settings_default)
else:
settings = copy.deepcopy(self.settings_custom)

# Flesch Reading Ease
self.combo_box_re_variant_nld.setCurrentText(settings['re']['variant_nld'])
self.combo_box_re_variant_spa.setCurrentText(settings['re']['variant_spa'])

# Wiener Sachtextformel
self.combo_box_variant.setCurrentText(settings['wstf']['variant'])
self.combo_box_wstf_variant.setCurrentText(settings['wstf']['variant'])

def apply_settings(self):
# Flesch Reading Ease
self.settings_custom['re']['variant_nld'] = self.combo_box_re_variant_nld.currentText()
self.settings_custom['re']['variant_spa'] = self.combo_box_re_variant_spa.currentText()

# Wiener Sachtextformel
self.settings_custom['wstf']['variant'] = self.combo_box_variant.currentText()
self.settings_custom['wstf']['variant'] = self.combo_box_wstf_variant.currentText()

return True

Expand Down

0 comments on commit 4a9fa51

Please sign in to comment.