add translator
This commit is contained in:
parent
709adbf507
commit
c9911c8abc
@ -0,0 +1,32 @@
|
||||
"""empty message
|
||||
|
||||
Revision ID: 55f95da68641
|
||||
Revises: 19fc4bee7a9f
|
||||
Create Date: 2025-06-21 20:51:15.097769
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '55f95da68641'
|
||||
down_revision: Union[str, None] = '19fc4bee7a9f'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_column('glossary_word', 'audio_file')
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.add_column('glossary_word', sa.Column('audio_file', sa.TEXT(), autoincrement=False, nullable=True))
|
||||
# ### end Alembic commands ###
|
||||
@ -0,0 +1,32 @@
|
||||
"""empty message
|
||||
|
||||
Revision ID: 78357f437f61
|
||||
Revises: 55f95da68641
|
||||
Create Date: 2025-06-21 20:51:29.437692
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '78357f437f61'
|
||||
down_revision: Union[str, None] = '55f95da68641'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.add_column('glossary_word', sa.Column('audio_file', sa.LargeBinary(), nullable=True))
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_column('glossary_word', 'audio_file')
|
||||
# ### end Alembic commands ###
|
||||
@ -3,7 +3,7 @@ from __future__ import annotations
|
||||
import datetime
|
||||
import enum
|
||||
|
||||
from sqlalchemy import BigInteger, Text, DateTime, Enum, func
|
||||
from sqlalchemy import BigInteger, Text, DateTime, Enum, func, LargeBinary
|
||||
from sqlalchemy.dialects.postgresql import ARRAY
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
@ -37,6 +37,7 @@ class GlossaryWord(Base):
|
||||
term: Mapped[str] = mapped_column(
|
||||
Text(),
|
||||
nullable=False,
|
||||
unique=True,
|
||||
)
|
||||
language: Mapped[LanguageEnum] = mapped_column(
|
||||
Enum(LanguageEnum, native_enum=False),
|
||||
@ -75,8 +76,8 @@ class GlossaryWord(Base):
|
||||
Text(),
|
||||
nullable=True,
|
||||
)
|
||||
audio_file: Mapped[str | None] = mapped_column(
|
||||
Text(),
|
||||
audio_file: Mapped[bytes | None] = mapped_column(
|
||||
LargeBinary(),
|
||||
nullable=True,
|
||||
)
|
||||
created_at: Mapped[datetime.datetime] = mapped_column(
|
||||
|
||||
@ -5,39 +5,47 @@ import dataclasses
|
||||
import pydantic
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
from greek_lang.languages import LanguageEnum
|
||||
from greek_lang.glossaries.models import LexicalCategoryEnum
|
||||
|
||||
|
||||
class WordInfo(pydantic.BaseModel):
|
||||
lemma: str = pydantic.Field(
|
||||
...,
|
||||
description="lemma (base form) - for verbs, use the 1st person singular in present indicative, "
|
||||
"for nouns and adjectives, use the nominative singular masculine (for adjectives)",
|
||||
)
|
||||
transcription: str = pydantic.Field(
|
||||
...,
|
||||
description="phonetic transcription in IPA",
|
||||
description="lemma phonetic transcription in IPA",
|
||||
)
|
||||
translation: str = pydantic.Field(
|
||||
...,
|
||||
description="translation in {target_language}",
|
||||
description="lemma translation in {target_language}",
|
||||
)
|
||||
description: str = pydantic.Field(
|
||||
...,
|
||||
description="description in {target_language}",
|
||||
description="lemma description in {target_language}",
|
||||
)
|
||||
part_of_speech: str = pydantic.Field(
|
||||
...,
|
||||
description="part of speech in {target_language}",
|
||||
description=f"part of speech, one of {[cat.value for cat in LexicalCategoryEnum]}",
|
||||
)
|
||||
example: str = pydantic.Field(
|
||||
...,
|
||||
description="example",
|
||||
description="lemma example",
|
||||
)
|
||||
example_transcription: str = pydantic.Field(
|
||||
...,
|
||||
description="phonetic transcription in IPA of an example",
|
||||
description="lemma phonetic transcription in IPA of an example",
|
||||
)
|
||||
example_translation: str = pydantic.Field(
|
||||
...,
|
||||
description="translation of the example in {target_language}",
|
||||
description="lemma translation of the example in {target_language}",
|
||||
)
|
||||
category: str = pydantic.Field(
|
||||
...,
|
||||
description="semantic category in {target_language}",
|
||||
description=f"semantic category in {{target_language}}",
|
||||
)
|
||||
etymology: str = pydantic.Field(
|
||||
...,
|
||||
@ -53,8 +61,8 @@ class OpenAiManager:
|
||||
self,
|
||||
*,
|
||||
word: str,
|
||||
source_lang: str,
|
||||
target_lang: str,
|
||||
source_lang: LanguageEnum,
|
||||
target_lang: LanguageEnum,
|
||||
model: str = "gpt-4o",
|
||||
) -> WordInfo:
|
||||
system_message = {
|
||||
@ -63,7 +71,7 @@ class OpenAiManager:
|
||||
}
|
||||
user_message = {
|
||||
"role": "user",
|
||||
"content": f'Provide detailed information about the word "{word}" in language {source_lang}, set {{target_language}} = {target_lang}.',
|
||||
"content": f'Provide detailed information about the word "{word}" in language {source_lang!s}, set {{target_language}} = {target_lang!s}.',
|
||||
}
|
||||
response = await self.client.beta.chat.completions.parse(
|
||||
model=model,
|
||||
|
||||
47
src/greek_lang/translator.py
Normal file
47
src/greek_lang/translator.py
Normal file
@ -0,0 +1,47 @@
|
||||
from dependency_injector.wiring import inject, Provide
|
||||
from sqlalchemy.ext.asyncio import async_sessionmaker, AsyncSession
|
||||
|
||||
from greek_lang.audio.manager import get_pronunciation
|
||||
from greek_lang.database.container import DatabaseContainer
|
||||
from greek_lang.languages import LanguageEnum
|
||||
from greek_lang.openai_manager.container import OpenAiContainer
|
||||
from greek_lang.openai_manager.manager import OpenAiManager
|
||||
from greek_lang.glossaries.models import GlossaryWord, LexicalCategoryEnum
|
||||
|
||||
|
||||
@inject
|
||||
async def translate(
|
||||
word: str,
|
||||
source_lang: LanguageEnum,
|
||||
target_lang: LanguageEnum = LanguageEnum.ru,
|
||||
note: str | None = None,
|
||||
tags: tuple[str, ...] = tuple(),
|
||||
open_ai_manager: OpenAiManager = Provide[OpenAiContainer.ai_manager],
|
||||
db_session_maker: async_sessionmaker[AsyncSession] = Provide[
|
||||
DatabaseContainer.async_session_maker,
|
||||
],
|
||||
) -> GlossaryWord:
|
||||
word_response = await open_ai_manager.get_gpt_response(
|
||||
word=word,
|
||||
source_lang=source_lang,
|
||||
target_lang=target_lang,
|
||||
)
|
||||
pronon = await get_pronunciation(text="έμπορος", source_lang=source_lang)
|
||||
|
||||
async with db_session_maker() as db_session, db_session.begin():
|
||||
glossary_word = GlossaryWord(
|
||||
term=word_response.lemma,
|
||||
language=source_lang.value,
|
||||
transcription=word_response.transcription,
|
||||
translation=word_response.translation,
|
||||
description=word_response.description,
|
||||
lexical_category=LexicalCategoryEnum(word_response.part_of_speech),
|
||||
meaning_category=word_response.category,
|
||||
example=f"{word_response.example}({word_response.example_translation})",
|
||||
etymology=word_response.etymology,
|
||||
note=note,
|
||||
tags=list(tags),
|
||||
audio_file=pronon.getvalue(),
|
||||
)
|
||||
db_session.add(glossary_word)
|
||||
return glossary_word
|
||||
Loading…
Reference in New Issue
Block a user