A Kernel Hybridization NGram-Okapi for Indexing and Classification of Arabic Documents
Cited by
Export citation
- BibTex
- RIS
- TXT
@Article{JICS-9-141,
author = {Taher Zaki , Driss Mammass , Abdellatif Ennaji and Stéphane Nicolas},
title = {A Kernel Hybridization NGram-Okapi for Indexing and Classification of Arabic Documents},
journal = {Journal of Information and Computing Science},
year = {2024},
volume = {9},
number = {2},
pages = {141--153},
abstract = { In this paper, we propose a hybrid system for contextual and semantic indexing of Arabic
documents, bringing an improvement to classical models based on n-grams and the Okapi model. This new
approach takes into account the concept of the semantic vicinity of terms. We proceed in fact by the
calculation of similarity between words using an hybridization of NGRAMs-OKAPI statistical measures
and a kernel function in order to identify relevant descriptors. Terminological resources such as graphs and
semantic dictionaries are integrated into the system to improve the indexing and the classification processes.
},
issn = {1746-7659},
doi = {https://doi.org/},
url = {http://global-sci.org/intro/article_detail/jics/22591.html}
}
TY - JOUR
T1 - A Kernel Hybridization NGram-Okapi for Indexing and Classification of Arabic Documents
AU - Taher Zaki , Driss Mammass , Abdellatif Ennaji and Stéphane Nicolas
JO - Journal of Information and Computing Science
VL - 2
SP - 141
EP - 153
PY - 2024
DA - 2024/01
SN - 9
DO - http://doi.org/
UR - https://global-sci.org/intro/article_detail/jics/22591.html
KW - Arabic documents, classification, indexing, kernel function, n-grams, okapi.
AB - In this paper, we propose a hybrid system for contextual and semantic indexing of Arabic
documents, bringing an improvement to classical models based on n-grams and the Okapi model. This new
approach takes into account the concept of the semantic vicinity of terms. We proceed in fact by the
calculation of similarity between words using an hybridization of NGRAMs-OKAPI statistical measures
and a kernel function in order to identify relevant descriptors. Terminological resources such as graphs and
semantic dictionaries are integrated into the system to improve the indexing and the classification processes.
Taher Zaki , Driss Mammass , Abdellatif Ennaji and Stéphane Nicolas. (2024). A Kernel Hybridization NGram-Okapi for Indexing and Classification of Arabic Documents.
Journal of Information and Computing Science. 9 (2).
141-153.
doi:
Copy to clipboard