from langchain_openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings()
embedding.embed_query('트럼프')
Trump = embedding.embed_query('Donald Trump')
Elon = embedding.embed_query('Elon Reeve Musk')
from langchain_core.output_parsers import PydanticOutputParser
parser = PydanticOutputParser(pydantic_object=SentimentArticle)
prompt_template = """
당신은 뉴스 기사의 감성을 분석하는 AI입니다.
아래 뉴스를 읽고 감성을 '긍정', '부정', '중립' 중 하나로 분류하고, 감성 점수를 json 형식으로 출력하세요.
{format_instructions}
뉴스 기사:
{news_article}
"""
from pydantic import BaseModel, RootModel, Field, ValidationError
from typing import List
class PersonInfo(BaseModel):
name: str = Field(description='사람의 이름')
age: int = Field(description="사람의 나이")
class PeopleList(RootModel[List[PersonInfo]]):
root : List[PersonInfo]
data = [
{'name' : '서찬웅', 'age' : 20}
]
PeopleList.model_validate(data)
from typing import Literaㅣ
class SentimentArticle(BaseModel):
sentiment: Literal['긍정', '부정', '중립'] = Field(description="감성분석 분류")
score: float = Field(description='감성분석 점수')
summary : str = Field(description = "글의 내용을 200자로 요약")
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
model = "gpt-5-mini-2025-08-07"
llm = ChatOpenAI(
temperature=0,
model_name= model
)
from langchain_core.prompts import PromptTemplate
prompt = PromptTemplate(
template=prompt_template,
input_variables=['news_article'],
partial_variables={"format_instructions" : parser.get_format_instructions()}
)
chain = prompt | llm | parser
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader("https://n.news.naver.com/mnews/hotissue/article/015/0005227494?type=series&cid=2003130")
loader.load()
result = chain.invoke(loader.load()[0].page_content.strip())
result.score
import zipfile
path = "data/09.필수의료 의학지식 데이터/3.개방데이터/1.데이터/Training/02.라벨링데이터/TL_내과.zip"
with zipfile.ZipFile(path, "r") as f:
f.extractall("./data2")
with open("./data2/필수_11.json", 'r', encoding='utf-8-sig') as f:
text = f.read()
import json
json.loads(text)
class Summary(BaseModel):
diagnosis : str = Field(description="해당 질병의 진단명")
cause : str = Field(description="질병의 원인")
complaint : str = Field(description="질병의 증상")
treatment : str = Field(description="질병의 치료법")
parser = PydanticOutputParser(pydantic_object=Summary)
prompt = """
당신은 AI 어시스턴트입니다. 아래 지시사항대로 답변 하세요.
QUESTION:
{question}
FORMAT
{format_instructions}
"""
prompt = PromptTemplate(
template=prompt,
input_variables=['question'],
partial_variables={"format_instructions" : parser.get_format_instructions()}
)
chain = prompt | llm | parser
chain.invoke(" 정답 --> ".join([text_json['question'], text_json['answer']]))
for roots, dirs, files in os.walk("./data2"):
for file in files:
print(f"{roots}/{file}")
from tqdm import tqdm
total_text = ""
for roots, dirs, files in os.walk("./data2"):
for file in tqdm(files):
# print(f"{roots}/{file}")
with open(f"{roots}/{file}", 'r', encoding='utf-8-sig') as f:
text = f.read()
text_json = json.loads(text)
total_text += " 정답 --> ".join([text_json['question'], text_json['answer'], "\n"])
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=0)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
result = text_splitter.split_text(total_text)
import numpy as np
from numpy.linalg import norm
def cosine_sim(a, b):
a = np.array(a)
b = np.array(b)
return np.dot(a, b.T) / (norm(b, axis=0) * norm(a))
cosine_sim(Trump, Elon)반응형
'프로젝트 하면서' 카테고리의 다른 글
| LangChain 실습(1) (0) | 2025.12.26 |
|---|---|
| n8n Gmail에서 원하는 첨부파일 다운로드 (0) | 2025.05.07 |
| 검색을 사용하려면 최신 버전의 Microsoft IME를 사용하세요/윈도우 이모지 검색 활성화 / (0) | 2025.03.21 |
| jbk1.6에서 drm암호화 엑셀파일 복호화 하기 (0) | 2022.05.02 |
| [Windows] 숨김폴더 확인하기 (0) | 2021.07.28 |
댓글