export const researches = [
  {
    feature: "Speech",
    tags: ["Speech", "Text-to-Speech", "Patent"],
    title:
      "Spectral and Latent Speech Representation Distortion for TTS Evaluation",
    description:
      "One of the main problems in the development of text-to-speech (TTS) systems is its reliance on subjective measures, typically the Mean Opinion Score (MOS). MOS requires a large number of people to reliably rate each utterance …",
    date: "2021",
    specialNote: "Interspeech 2021 (top Speech conference)",
    link: "https://www.isca-speech.org/archive/pdfs/interspeech_2021/kongthaworn21_interspeech.pdf",
    // link: 'https://scholar.google.com/citations?view_op=view_citation&hl=en&user=ST-jPeYAAAAJ&cstart=20&pagesize=80&sortby=pubdate&citation_for_view=ST-jPeYAAAAJ:4JMBOYKVnBMC'
  },
  {
    feature: "Speech",
    tags: [
      "Speech",
      "Automatic Speech Recognition",
      "Semi-Supervised Learning",
    ],
    title: "Word-level Confidence Estimation for CTC Models",
    description:
      "Measuring confidence in Automatic Speech Recognition (ASR) is important for ensuring the reliability of downstream applications. Previous works proposed Confidence Estimation Module (CEM) for predicting …",
    date: "2023",
    specialNote: "Interspeech 2023 (top Speech conference) (soon)",
    link: undefined,
  },
  {
    feature: "Speech",
    tags: ["Speech", "Voice Biometrics", "Multilingual", "State-of-the-art"],
    title: "Instance-based Temporal Normalization for Speaker Verification",
    description:
      "One of the challenges in speaker verification is domain mis-match and other effects such as language and emotion. Normalization techniques such as Batch Normalization (BN) have been proven effective in improving neural …",
    date: "2023",
    specialNote: "Interspeech 2023 (top Speech conference) (soon)",
    link: undefined,
  },
  {
    feature: "Speech",
    tags: ["Speech", "Automatic Speech Recognition", "Thai Dialect", "Data"],
    title:
      "Thai Dialect Corpus and Transfer-based Curriculum Learning Investigation for Dialect Automatic Speech Recognition",
    description:
      "We release 840 hours of read speech multi-dialect ASR corpora consisting of 700 hours of main Thai dialect, named Thai-central, and 40 hours for each local dialect, named Thai- dialect, with transcripts and their translations to Thai …",
    date: "2023",
    specialNote: "Interspeech 2023 (top Speech conference) (soon)",
    link: undefined,
  },
  {
    feature: "Speech",
    tags: ["Speech", "Emotion Detection"],
    title:
      "An Application for Mental Health Monitoring using Facial, Voice, and Questionnaire Information",
    description:
      "Depression is a major societal issue. However, depression can be hard to self-diagnose, and people suffering from depression often hesitate to consult with professionals. We discuss the design and initial testings of our prototype …",
    date: "2023",
    specialNote: "AAAI Summer Symposium AI x Metaverse 2023 (soon)",
    link: undefined,
  },
  {
    feature: "Speech",
    tags: ["Speech", "Cloud"],
    title:
      "0.01 Cent per Second: Developing a Cloud-based Cost-effective Audio Transcription System for an Online Video Learning Platform",
    description:
      "Using automatic speech recognition (ASR) to transcribe videos in an online video learning platform can benefit learners in multiple ways. However, existing speech-to-text APIs can be costly to use, especially for long lecture videos commonly found …",
    date: "2023",
    specialNote:
      "Joint Conference on Computer Science and Software Engineering 2023 (JCSSE) (soon)",
    link: undefined,
  },
  {
    feature: "Speech",
    tags: ["Speech", "Automatic Speech Recognition"],
    title:
      "Reducing spelling inconsistencies in code-switching ASR using contextualized CTC loss",
    description:
      "Code-Switching (CS) remains a challenge for Automatic Speech Recognition (ASR), especially character-based models. With the combined choice of characters from multiple languages, the out-come from character-based models …",
    date: "June 2021",
    specialNote:
      "IEEE International Conference on Acoustics, Speech and Signal Processing 2021 (ICASSP, top Speech conference)",
    link: "https://arxiv.org/pdf/2005.07920.pdf",
  },
  {
    feature: "Natural Language Processing (NLP)",
    tags: ["NLP", "Information Retrieval"],
    title: "Typo-Robust Representation Learning for Dense Retrieval",
    description:
      "Dense retrieval is a basic building block of information retrieval applications. One of the main challenges of dense retrieval in real-world settings is the handling of queries containing misspelled words…",
    date: "June 2023",
    specialNote:
      "Association for Computational Linguistics 2023 (ACL, top NLP conference)",
    link: "https://arxiv.org/abs/2306.10348",
  },
  {
    feature: "Natural Language Processing (NLP)",
    tags: ["NLP"],
    title:
      "ConGen: Unsupervised Control and Generalization Distillation For Sentence Representation",
    description:
      "Sentence representations are essential in many NLP tasks operating at the sentence level. Recently, research attention has shifted towards learning how to represent sentences without any annotations …",
    date: "December 2022",
    specialNote:
      "Empirical Methods in Natural Language Processing 2022 (EMNLP, top NLP conference)",
    link: "https://aclanthology.org/2022.findings-emnlp.483.pdf",
  },
  {
    feature: "Natural Language Processing (NLP)",
    tags: ["NLP"],
    title:
      "Mitigating Spurious Correlation in Natural Language Understanding with Counterfactual Inference",
    description:
      "Despite their promising results on standard benchmarks, NLU models are still prone to make predictions based on shortcuts caused by unintended bias in the dataset. For example, an NLI model may use lexical overlap as …",
    date: "December 2022",
    specialNote:
      "Empirical Methods in Natural Language Processing 2022 (EMNLP, top NLP conference)",
    link: "https://aclanthology.org/2022.emnlp-main.777.pdf",
  },
  {
    feature: "Natural Language Processing (NLP)",
    tags: ["NLP", "Information Retrieval"],
    title:
      "Cl-relkt: Cross-lingual language knowledge transfer for multilingual retrieval question answering",
    description:
      "Cross-Lingual Retrieval Question Answering (CL-ReQA) is concerned with retrieving answer documents or passages to a question written in a different language. A common approach to CL-ReQA is to create a multilingual sentence …",
    date: "July 2022",
    specialNote:
      "North American Chapter of the Association for Computational Linguistics 2022 (NAACL, top NLP conference)",
    link: "https://aclanthology.org/2022.findings-naacl.165.pdf",
  },
  {
    feature: "Natural Language Processing (NLP)",
    tags: ["NLP", "Tokenization"],
    title: "Tokenization-based data augmentation for text classification",
    description:
      "Tokenization is one of the most important data preprocessing steps in the text classification task and also one of the main contributing factors in the model performance. However, getting good tokenizations …",
    date: "June 2022",
    specialNote:
      "Joint Conference on Computer Science and Software Engineering 2022 (JCSSE)",
    link: "https://ieeexplore.ieee.org/abstract/document/9836268",
  },
  {
    feature: "Natural Language Processing (NLP)",
    tags: ["NLP", "Information Retrieval"],
    title:
      "Robust Fragment-Based Framework for Cross-lingual Sentence Retrieval",
    description:
      "Cross-lingual Sentence Retrieval (CLSR) aims at retrieving parallel sentence pairs that are translations of each other from a multilingual set of comparable documents. The retrieved parallel sentence pairs can be used in other downstream NLP tasks …",
    date: "November 2021",
    specialNote:
      "Association for Computational Linguistics 2021 (ACL, top NLP conference)",
    link: "https://wlv.openrepository.com/bitstream/handle/2436/624330/2021.findings-emnlp.80.pdf?sequence=6&isAllowed=y",
  },
  {
    feature: "Natural Language Processing (NLP)",
    tags: ["NLP", "Word Segmentation"],
    title: "Handling cross and out-of-domain samples in Thai word segmentation",
    description:
      "While word segmentation is a solved problem in many languages, it is still a challenge in continuous-script or low-resource languages. Like other NLP tasks, word segmentation is domain-dependent, which can be a challenge in low-resource languages like Thai …",
    date: "August 2021",
    specialNote:
      "Association for Computational Linguistics 2021 (ACL, top NLP conference)",
    link: "https://wlv.openrepository.com/bitstream/handle/2436/624145/2021.findings-acl.86.pdf?sequence=5",
  },
  {
    feature: "Natural Language Processing (NLP)",
    tags: ["NLP", "Spelling Correction"],
    title:
      "Thai spelling correction and word normalization on social text using a two-stage pipeline with neural contextual attention",
    description:
      "Text correction systems (e.g., spell checkers) have been used to improve the quality of computerized text by detecting and correcting errors. However, the task of performing spelling correction and word normalization (text correction) for Thai …",
    date: "July 2021",
    specialNote: "IEEE Access",
    link: "https://ieeexplore.ieee.org/iel7/6287639/8948470/09145483.pdf",
  },
  {
    feature: "Natural Language Processing (NLP)",
    tags: ["NLP", "Text Captioning"],
    title: "Set prediction in the latent space",
    description:
      "Set prediction tasks require the matching between predicted set and ground truth set in order to propagate the gradient signal. Recent works have performed this matching in the original feature space thus requiring …",
    date: "December 2021",
    specialNote:
      "Neural Information Processing Systems 2021 (Neurips, top Machine Learning conference)",
    link: "https://proceedings.neurips.cc/paper_files/paper/2021/file/d61e9e58ae1058322bc169943b39f1d8-Paper.pdf",
  },
  {
    feature: "Natural Language Processing (NLP)",
    tags: ["NLP"],
    title:
      "A comparative study of pretrained language models for automated essay scoring with adversarial inputs",
    description:
      "Automated Essay Scoring (AES) is a task that deals with grading written essays automatically without human intervention. This study compares the performance of three AES models which utilize different text embedding methods …",
    date: "November 2020",
    specialNote: "IEEE TENCON 2020",
    link: "https://ieeexplore.ieee.org/abstract/document/9293930",
  },
  {
    feature: "Natural Language Processing (NLP)",
    tags: ["NLP", "Sentence Segmentation"],
    title:
      "Semi-supervised Thai Sentence segmentation using local and distant word representations",
    description:
      "A sentence is typically treated as the minimal syntactic unit used for extracting valuable information from a longer piece of text. However, in written Thai, there are no explicit sentence markers. We proposed a deep learning model …",
    date: "August 2019",
    specialNote: "Engineering Journal",
    link: "https://ieeexplore.ieee.org/iel7/6287639/8948470/09145483.pdf",
  },
  {
    feature: "Data",
    tags: ["Data", "Crowdsourcing", "Multilingual"],
    title: "Crowdsourced Data Validation for ASR Training",
    description:
      "Many ASR engines are based on crowdsourced speech corpora, such as Common Voice. Although crowdsourced data is inexpensive, the utterances obtained from crowdsourcing can be noisy because of uncontrollable factors such as accents, environments, …",
    date: "2023",
    specialNote: "Interspeech 2023 (soon)",
    link: undefined,
  },
  {
    feature: "Data",
    tags: ["Data", "Open-sourced", "Smart Home"],
    title: "Gowajee Corpus",
    description:
      "There are 17597 utterances collected from 166 speakers. 142 are males, while 24 arefemales. The total length of the corpus is 14 hours and 40 minutes. The vocabulary size is 2129 words with a total of 98253 words.",
    date: "2021",
    specialNote: "",
    link: "https://github.com/ekapolc/gowajee_corpus",
  },
  {
    feature: "Data",
    tags: ["Data", "Open-sourced", "Emotion Detection"],
    title: "Thai Speech Emotion Dataset",
    description: "Thai Speech Emotion Dataset",
    date: "March 2021",
    specialNote: "",
    link: "https://airesearch.in.th/releases/speech-emotion-dataset/",
  },
  {
    feature: "Data",
    tags: ["Data", "Open-sourced"],
    title: "CommonVoice",
    description: "The open-sourced dataset",
    date: "2020",
    specialNote: "",
    link: "https://commonvoice.mozilla.org/th/datasets",
  },
];
