dailp/morpheme.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
use crate::*;
use async_graphql::FieldResult;
use serde::{Deserialize, Serialize};
use sqlx::postgres::{PgHasArrayType, PgTypeInfo};
use std::borrow::Cow;
/// A single unit of meaning and its corresponding English gloss.
#[derive(Serialize, Clone, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct WordSegment {
/// Which Cherokee representation system is this segment written with?
#[serde(skip)]
pub system: Option<CherokeeOrthography>,
/// Source language representation of this segment.
pub morpheme: String,
/// Target language representation of this segment.
pub gloss: String,
/// Database ID for the associated gloss, which may be shared by several
/// morphemes in the same document.
#[serde(skip)]
pub gloss_id: Option<Uuid>,
/// What kind of thing is the next segment?
///
/// This field determines what character should separate this segment from
/// the next one when reconstituting a full segmentation string.
pub role: WordSegmentRole,
/// Optional glossary entry for this segment which gives further information,
/// like a definition and example usages.
pub matching_tag: Option<MorphemeTag>,
}
/// The kind of segment that a particular sequence of characters in a morphemic
/// segmentations represent.
#[derive(
Clone, Copy, Debug, Serialize, Deserialize, sqlx::Type, async_graphql::Enum, PartialEq, Eq,
)]
#[sqlx(type_name = "word_segment_role")]
pub enum WordSegmentRole {
/// Separated by a hyphen '-'
Morpheme,
/// Separated by an equals sign '='
Clitic,
/// Separated by a colon ':'
Modifier,
}
impl PgHasArrayType for WordSegmentRole {
fn array_type_info() -> PgTypeInfo {
<&str as PgHasArrayType>::array_type_info()
}
fn array_compatible(ty: &PgTypeInfo) -> bool {
<&str as PgHasArrayType>::array_compatible(ty)
}
}
impl WordSegment {
/// Make a new morpheme segment
pub fn new(morpheme: String, gloss: String, role: Option<WordSegmentRole>) -> Self {
Self {
system: None,
morpheme,
gloss,
// FIXME Shortcut to keep this function the same while allowing
// migration code to create this data structure.
gloss_id: None,
role: role.unwrap_or(WordSegmentRole::Morpheme),
matching_tag: None,
}
}
/// Parse all segments from a raw interlinear morphemic segmentation.
/// The first argument is the segmented source, while the second argument is
/// the target language gloss of each segment.
pub fn parse_many(morpheme_layer: &str, gloss_layer: &str) -> Option<Vec<Self>> {
let (_, result) = parse_gloss_layers(morpheme_layer, gloss_layer).ok()?;
Some(result)
}
/// The separator that should follow this segment, based on the type of the
/// next segment.
pub fn get_previous_separator(&self) -> &str {
use WordSegmentRole::*;
match self.role {
Morpheme => "-",
Clitic => "=",
Modifier => ":",
}
}
/// Build a string of the morpheme gloss line, used in interlinear gloss
/// text (IGT).
pub fn gloss_layer<'a>(segments: impl IntoIterator<Item = &'a WordSegment>) -> String {
use itertools::Itertools;
segments
.into_iter()
.enumerate()
.flat_map(|(index, s)| {
vec![
if index > 0 {
s.get_previous_separator()
} else {
""
},
&*s.gloss,
]
})
.join("")
}
/// Convert the source representation of this segment into the given
/// phonemic writing system.
pub fn get_morpheme(&self) -> Cow<'_, str> {
match self.system {
Some(orthography) => Cow::Owned(orthography.convert(&self.morpheme)),
_ => Cow::Borrowed(&*self.morpheme),
}
}
}
#[async_graphql::Object]
impl WordSegment {
/// Phonemic representation of the morpheme
async fn morpheme(&self) -> Cow<'_, str> {
self.get_morpheme()
}
/// English gloss in standard DAILP format that refers to a lexical item
async fn gloss(&self) -> &str {
&self.gloss
}
/// What kind of thing is this segment?
async fn role(&self) -> WordSegmentRole {
self.role
}
/// This field determines what character should separate this segment from
/// the previous one when reconstituting the full segmentation string.
async fn previous_separator(&self) -> &str {
self.get_previous_separator()
}
/// If this morpheme represents a functional tag that we have further
/// information on, this is the corresponding database entry.
async fn matching_tag(
&self,
context: &async_graphql::Context<'_>,
) -> FieldResult<Option<MorphemeTag>> {
use async_graphql::dataloader::*;
if let Some(matching_tag) = &self.matching_tag {
Ok(Some(matching_tag.clone()))
} else if let Some(gloss_id) = self.gloss_id {
Ok(context
.data::<DataLoader<Database>>()?
.load_one(TagForMorpheme(
gloss_id,
self.system.unwrap_or(CherokeeOrthography::Taoc),
))
.await?)
} else {
Ok(None)
}
}
}
/// A single unit of meaning and its gloss which can be edited.
#[derive(async_graphql::InputObject)]
pub struct MorphemeSegmentUpdate {
/// Which Cherokee representation system is this segment written with?
pub system: Option<CherokeeOrthography>,
/// Source language representation of this segment.
pub morpheme: String,
/// Target language representation of this segment.
pub gloss: String,
/// This field determines what character should separate this segment from
/// the next one when reconstituting the full segmentation string.
pub role: WordSegmentRole,
}