
from elasticsearch import Elasticsearch
import requests
import tiktoken
# ✅ 連線 Elasticsearch
es = Elasticsearch("<http://localhost:9200>", verify_certs=False)
# ✅ 下載 FAQ json
docs_url = '<https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1>'
resp = requests.get(docs_url)
documents_raw = resp.json()
# ✅ 展開資料
documents = []
for course in documents_raw:
course_name = course['course']
for doc in course['documents']:
doc['course'] = course_name
documents.append(doc)
# ✅ 刪除舊索引
index_name = "faq"
if es.indices.exists(index=index_name):
es.indices.delete(index=index_name)
# ✅ 建立新的 index
es.indices.create(
index=index_name,
mappings={
"properties": {
"course": {"type": "keyword"},
"question": {"type": "text"},
"text": {"type": "text"}
}
}
)
# ✅ 加入資料
for i, doc in enumerate(documents):
es.index(index=index_name, id=i, document=doc)
# ✅ Q3: 搜尋
q3_query = {
"query": {
"multi_match": {
"query": "How do execute a command on a Kubernetes pod?",
"fields": ["question^4", "text"],
"type": "best_fields"
}
},
"size": 1
}
q3_res = es.search(index=index_name, query=q3_query["query"], size=1)
q3_score = q3_res['hits']['hits'][0]['_score']
print("Q3 答案:最高分數是", round(q3_score, 2))
# ✅ Q4: 濾 course 並查詢
q4_query = {
"query": {
"bool": {
"must": {
"multi_match": {
"query": "How do copy a file to a Docker container?",
"fields": ["question^4", "text"],
"type": "best_fields"
}
},
"filter": {
"term": {"course": "machine-learning-zoomcamp"}
}
}
},
"size": 3
}
q4_res = es.search(index=index_name, query=q4_query["query"], size=3)
print("Q4 答案:前三筆問題分別是:")
for i, hit in enumerate(q4_res['hits']['hits'], 1):
print(f"{i}. {hit['_source']['question']}")
# ✅ Q5: 建 Prompt
context_template = """
Q: {question}
A: {text}
""".strip()
context_entries = []
for hit in q4_res['hits']['hits']:
q = hit['_source']['question']
a = hit['_source']['text']
context_entries.append(context_template.format(question=q, text=a))
context = "\\n\\n".join(context_entries)
prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.
QUESTION: {question}
CONTEXT:
{context}
""".strip()
final_prompt = prompt_template.format(
question="How do copy a file to a Docker container?",
context=context
)
print("Q5 答案:prompt 長度是", len(final_prompt))
# ✅ Q6: 計算 token
encoding = tiktoken.encoding_for_model("gpt-4o")
tokens = encoding.encode(final_prompt)
print("Q6 答案:token 數量是", len(tokens))