add translator
This commit is contained in:
parent
709adbf507
commit
c9911c8abc
@ -0,0 +1,32 @@
|
|||||||
|
"""empty message
|
||||||
|
|
||||||
|
Revision ID: 55f95da68641
|
||||||
|
Revises: 19fc4bee7a9f
|
||||||
|
Create Date: 2025-06-21 20:51:15.097769
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = '55f95da68641'
|
||||||
|
down_revision: Union[str, None] = '19fc4bee7a9f'
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
"""Upgrade schema."""
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.drop_column('glossary_word', 'audio_file')
|
||||||
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
"""Downgrade schema."""
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.add_column('glossary_word', sa.Column('audio_file', sa.TEXT(), autoincrement=False, nullable=True))
|
||||||
|
# ### end Alembic commands ###
|
||||||
@ -0,0 +1,32 @@
|
|||||||
|
"""empty message
|
||||||
|
|
||||||
|
Revision ID: 78357f437f61
|
||||||
|
Revises: 55f95da68641
|
||||||
|
Create Date: 2025-06-21 20:51:29.437692
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = '78357f437f61'
|
||||||
|
down_revision: Union[str, None] = '55f95da68641'
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
"""Upgrade schema."""
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.add_column('glossary_word', sa.Column('audio_file', sa.LargeBinary(), nullable=True))
|
||||||
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
"""Downgrade schema."""
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.drop_column('glossary_word', 'audio_file')
|
||||||
|
# ### end Alembic commands ###
|
||||||
@ -3,7 +3,7 @@ from __future__ import annotations
|
|||||||
import datetime
|
import datetime
|
||||||
import enum
|
import enum
|
||||||
|
|
||||||
from sqlalchemy import BigInteger, Text, DateTime, Enum, func
|
from sqlalchemy import BigInteger, Text, DateTime, Enum, func, LargeBinary
|
||||||
from sqlalchemy.dialects.postgresql import ARRAY
|
from sqlalchemy.dialects.postgresql import ARRAY
|
||||||
from sqlalchemy.orm import Mapped, mapped_column
|
from sqlalchemy.orm import Mapped, mapped_column
|
||||||
|
|
||||||
@ -37,6 +37,7 @@ class GlossaryWord(Base):
|
|||||||
term: Mapped[str] = mapped_column(
|
term: Mapped[str] = mapped_column(
|
||||||
Text(),
|
Text(),
|
||||||
nullable=False,
|
nullable=False,
|
||||||
|
unique=True,
|
||||||
)
|
)
|
||||||
language: Mapped[LanguageEnum] = mapped_column(
|
language: Mapped[LanguageEnum] = mapped_column(
|
||||||
Enum(LanguageEnum, native_enum=False),
|
Enum(LanguageEnum, native_enum=False),
|
||||||
@ -75,8 +76,8 @@ class GlossaryWord(Base):
|
|||||||
Text(),
|
Text(),
|
||||||
nullable=True,
|
nullable=True,
|
||||||
)
|
)
|
||||||
audio_file: Mapped[str | None] = mapped_column(
|
audio_file: Mapped[bytes | None] = mapped_column(
|
||||||
Text(),
|
LargeBinary(),
|
||||||
nullable=True,
|
nullable=True,
|
||||||
)
|
)
|
||||||
created_at: Mapped[datetime.datetime] = mapped_column(
|
created_at: Mapped[datetime.datetime] = mapped_column(
|
||||||
|
|||||||
@ -5,39 +5,47 @@ import dataclasses
|
|||||||
import pydantic
|
import pydantic
|
||||||
from openai import AsyncOpenAI
|
from openai import AsyncOpenAI
|
||||||
|
|
||||||
|
from greek_lang.languages import LanguageEnum
|
||||||
|
from greek_lang.glossaries.models import LexicalCategoryEnum
|
||||||
|
|
||||||
|
|
||||||
class WordInfo(pydantic.BaseModel):
|
class WordInfo(pydantic.BaseModel):
|
||||||
|
lemma: str = pydantic.Field(
|
||||||
|
...,
|
||||||
|
description="lemma (base form) - for verbs, use the 1st person singular in present indicative, "
|
||||||
|
"for nouns and adjectives, use the nominative singular masculine (for adjectives)",
|
||||||
|
)
|
||||||
transcription: str = pydantic.Field(
|
transcription: str = pydantic.Field(
|
||||||
...,
|
...,
|
||||||
description="phonetic transcription in IPA",
|
description="lemma phonetic transcription in IPA",
|
||||||
)
|
)
|
||||||
translation: str = pydantic.Field(
|
translation: str = pydantic.Field(
|
||||||
...,
|
...,
|
||||||
description="translation in {target_language}",
|
description="lemma translation in {target_language}",
|
||||||
)
|
)
|
||||||
description: str = pydantic.Field(
|
description: str = pydantic.Field(
|
||||||
...,
|
...,
|
||||||
description="description in {target_language}",
|
description="lemma description in {target_language}",
|
||||||
)
|
)
|
||||||
part_of_speech: str = pydantic.Field(
|
part_of_speech: str = pydantic.Field(
|
||||||
...,
|
...,
|
||||||
description="part of speech in {target_language}",
|
description=f"part of speech, one of {[cat.value for cat in LexicalCategoryEnum]}",
|
||||||
)
|
)
|
||||||
example: str = pydantic.Field(
|
example: str = pydantic.Field(
|
||||||
...,
|
...,
|
||||||
description="example",
|
description="lemma example",
|
||||||
)
|
)
|
||||||
example_transcription: str = pydantic.Field(
|
example_transcription: str = pydantic.Field(
|
||||||
...,
|
...,
|
||||||
description="phonetic transcription in IPA of an example",
|
description="lemma phonetic transcription in IPA of an example",
|
||||||
)
|
)
|
||||||
example_translation: str = pydantic.Field(
|
example_translation: str = pydantic.Field(
|
||||||
...,
|
...,
|
||||||
description="translation of the example in {target_language}",
|
description="lemma translation of the example in {target_language}",
|
||||||
)
|
)
|
||||||
category: str = pydantic.Field(
|
category: str = pydantic.Field(
|
||||||
...,
|
...,
|
||||||
description="semantic category in {target_language}",
|
description=f"semantic category in {{target_language}}",
|
||||||
)
|
)
|
||||||
etymology: str = pydantic.Field(
|
etymology: str = pydantic.Field(
|
||||||
...,
|
...,
|
||||||
@ -53,8 +61,8 @@ class OpenAiManager:
|
|||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
word: str,
|
word: str,
|
||||||
source_lang: str,
|
source_lang: LanguageEnum,
|
||||||
target_lang: str,
|
target_lang: LanguageEnum,
|
||||||
model: str = "gpt-4o",
|
model: str = "gpt-4o",
|
||||||
) -> WordInfo:
|
) -> WordInfo:
|
||||||
system_message = {
|
system_message = {
|
||||||
@ -63,7 +71,7 @@ class OpenAiManager:
|
|||||||
}
|
}
|
||||||
user_message = {
|
user_message = {
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": f'Provide detailed information about the word "{word}" in language {source_lang}, set {{target_language}} = {target_lang}.',
|
"content": f'Provide detailed information about the word "{word}" in language {source_lang!s}, set {{target_language}} = {target_lang!s}.',
|
||||||
}
|
}
|
||||||
response = await self.client.beta.chat.completions.parse(
|
response = await self.client.beta.chat.completions.parse(
|
||||||
model=model,
|
model=model,
|
||||||
|
|||||||
47
src/greek_lang/translator.py
Normal file
47
src/greek_lang/translator.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
from dependency_injector.wiring import inject, Provide
|
||||||
|
from sqlalchemy.ext.asyncio import async_sessionmaker, AsyncSession
|
||||||
|
|
||||||
|
from greek_lang.audio.manager import get_pronunciation
|
||||||
|
from greek_lang.database.container import DatabaseContainer
|
||||||
|
from greek_lang.languages import LanguageEnum
|
||||||
|
from greek_lang.openai_manager.container import OpenAiContainer
|
||||||
|
from greek_lang.openai_manager.manager import OpenAiManager
|
||||||
|
from greek_lang.glossaries.models import GlossaryWord, LexicalCategoryEnum
|
||||||
|
|
||||||
|
|
||||||
|
@inject
|
||||||
|
async def translate(
|
||||||
|
word: str,
|
||||||
|
source_lang: LanguageEnum,
|
||||||
|
target_lang: LanguageEnum = LanguageEnum.ru,
|
||||||
|
note: str | None = None,
|
||||||
|
tags: tuple[str, ...] = tuple(),
|
||||||
|
open_ai_manager: OpenAiManager = Provide[OpenAiContainer.ai_manager],
|
||||||
|
db_session_maker: async_sessionmaker[AsyncSession] = Provide[
|
||||||
|
DatabaseContainer.async_session_maker,
|
||||||
|
],
|
||||||
|
) -> GlossaryWord:
|
||||||
|
word_response = await open_ai_manager.get_gpt_response(
|
||||||
|
word=word,
|
||||||
|
source_lang=source_lang,
|
||||||
|
target_lang=target_lang,
|
||||||
|
)
|
||||||
|
pronon = await get_pronunciation(text="έμπορος", source_lang=source_lang)
|
||||||
|
|
||||||
|
async with db_session_maker() as db_session, db_session.begin():
|
||||||
|
glossary_word = GlossaryWord(
|
||||||
|
term=word_response.lemma,
|
||||||
|
language=source_lang.value,
|
||||||
|
transcription=word_response.transcription,
|
||||||
|
translation=word_response.translation,
|
||||||
|
description=word_response.description,
|
||||||
|
lexical_category=LexicalCategoryEnum(word_response.part_of_speech),
|
||||||
|
meaning_category=word_response.category,
|
||||||
|
example=f"{word_response.example}({word_response.example_translation})",
|
||||||
|
etymology=word_response.etymology,
|
||||||
|
note=note,
|
||||||
|
tags=list(tags),
|
||||||
|
audio_file=pronon.getvalue(),
|
||||||
|
)
|
||||||
|
db_session.add(glossary_word)
|
||||||
|
return glossary_word
|
||||||
Loading…
Reference in New Issue
Block a user