Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions app/routers/cv_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ async def upload_cv_to_pool(
"is_existing": True
}

compressed_text = compress_cv_data(cv_data, cv_data.get("skills", []))
compressed_text = compress_cv_data(raw_text, cv_data, cv_data.get("skills", []))
cv_vector = get_embedding(compressed_text)

pool_record = {
Expand Down Expand Up @@ -133,7 +133,9 @@ async def map_cv_to_job(
"years_of_experience": cv_record["candidate_info"].get("years_of_experience", 0),
"skill_experience": cv_record["candidate_info"].get("skill_experience", {}),
"education_level": cv_record["candidate_info"].get("education_level", "Không đề cập"),
"cv_vector": cv_record.get("cv_vector", [])
"cv_vector": cv_record.get("cv_vector", []),
"word_count": len((cv_record.get("raw_text", "") or "").split()),
"raw_text": cv_record.get("raw_text", "")
}

scoring_result = score_cv(cv_data_for_scoring, jd_data)
Expand Down
4 changes: 3 additions & 1 deletion app/routers/job_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ async def rescore_all_applications_for_job(job_id: str, jd_data: dict, current_h
"years_of_experience": cv_record["candidate_info"].get("years_of_experience", 0),
"skill_experience": cv_record["candidate_info"].get("skill_experience", {}),
"education_level": cv_record["candidate_info"].get("education_level", "Không đề cập"),
"cv_vector": cv_record.get("cv_vector", [])
"cv_vector": cv_record.get("cv_vector", []),
"word_count": len((cv_record.get("raw_text", "") or "").split()),
"raw_text": cv_record.get("raw_text", "")
}

new_score = score_cv(cv_data_for_scoring, jd_data)
Expand Down
37 changes: 33 additions & 4 deletions app/services/nlp_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logging.getLogger("pdfminer").setLevel(logging.ERROR)

def load_skills(file_path: str) -> Dict[str, List[str]]:
skill_map = {}
Expand Down Expand Up @@ -67,7 +68,7 @@ def extract_skills(text: str) -> List[str]:
found.add(main)
break

return list(found)
return sorted(list(found))

def extract_basic_info(text: str) -> Dict:
email = re.search(r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b", text)
Expand Down Expand Up @@ -95,6 +96,9 @@ def extract_social_links(text: str) -> dict:
for match in matches:
url = match.group(0).rstrip('.,;)]')

if url.endswith(('.js', '.ts', '.php', '.py', '.html', '.css', '.cpp')):
continue

if '@' in url and not url.startswith('http'):
continue

Expand Down Expand Up @@ -223,7 +227,7 @@ def get_normalized_skill(raw_skill: str) -> str:
return root
return raw_lower

def calculate_skill_score(cv_skills: set, cv_skill_exp: dict, jd_required: list, jd_preferred: list):
def calculate_skill_score(cv_skills: set, cv_skill_exp: dict, cv_yoe: float, jd_required: list, jd_preferred: list):
score = 0.0
total_weight = sum(s.get('weight', 1.0) for s in jd_required) + sum(s.get('weight', 0.5) for s in jd_preferred)

Expand All @@ -243,6 +247,9 @@ def evaluate_skill(skill_dict, default_weight):
if norm_name in cv_skills:
cv_years = cv_skill_exp.get(norm_name, 0.0)

if cv_years == 0.0 and cv_yoe > 0:
cv_years = cv_yoe * 0.5

if req_years > 0:
if cv_years >= req_years:
bonus = min((cv_years - req_years) * 0.1, 0.2) * weight
Expand Down Expand Up @@ -333,7 +340,7 @@ def score_cv(cv_data: dict, jd_data: dict) -> dict:
jd_vector = jd_data.get("jd_vector", [])
cv_vector = cv_data.get("cv_vector", [])

skill_score, matched_skills, missing_required_skills = calculate_skill_score(cv_skills, cv_skill_exp, jd_required_skills, jd_preferred_skills)
skill_score, matched_skills, missing_required_skills = calculate_skill_score(cv_skills, cv_skill_exp, cv_yoe, jd_required_skills, jd_preferred_skills)
experience_score = calculate_experience_score(cv_yoe, jd_min_yoe)
education_score = calculate_education_score(cv_edu, jd_min_edu)

Expand All @@ -353,13 +360,35 @@ def score_cv(cv_data: dict, jd_data: dict) -> dict:

total_score = min(100.0, total_score)

raw_text = cv_data.get("raw_text", "").lower()
eng_words = [" the ", " and ", " in ", " to ", " of ", " for ", " with "]
vie_words = [" và ", " của ", " trong ", " cho ", " với ", " tại ", " là ", " các ", " người "]

eng_count = sum(raw_text.count(w) for w in eng_words)
vie_count = sum(raw_text.count(w) for w in vie_words)
is_english = eng_count > vie_count

threshold_severe = 100 if is_english else 200
threshold_light = 150 if is_english else 300

word_count = cv_data.get("word_count")
penalty_score = 0.0

if word_count < threshold_severe:
penalty_score = 20.0
elif word_count < threshold_light:
penalty_score = 10.0

total_score = max(0.0, total_score - penalty_score)

return {
"total_score": round(total_score, 2),
"score_breakdown": {
"skills_score": skill_score,
"experience_score": experience_score,
"education_score": education_score,
"nlp_score": nlp_score
"nlp_score": nlp_score,
"penalty_score": penalty_score
},
"matched_skills": matched_skills,
"missing_required_skills": missing_required_skills
Expand Down
24 changes: 20 additions & 4 deletions app/services/vector_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,37 @@
load_dotenv()
COLAB_API_URL = os.getenv("COLAB_API_URL")

def compress_cv_data(candidate_info: dict, extracted_skills: list) -> str:
def compress_cv_data(raw_text: str, candidate_info: dict, extracted_skills: list) -> str:
edu = candidate_info.get("education_level", "Không có thông tin học vấn")
yoe = candidate_info.get("years_of_experience", 0)
skills_str = ", ".join(extracted_skills) if extracted_skills else "Không có kỹ năng rõ ràng"

return f"Ứng viên trình độ {edu}, có {yoe} năm kinh nghiệm làm việc. Kỹ năng chuyên môn bao gồm: {skills_str}."
full_cv_context = (
f"Thông tin tóm tắt: Trình độ {edu}, {yoe} năm kinh nghiệm. Kỹ năng: {skills_str}.\n"
f"Chi tiết Hồ sơ:\n{raw_text}"
)
return full_cv_context

def compress_jd_data(jd_data: dict) -> str:
title = jd_data.get("title", "")
yoe = jd_data.get("min_yoe", 0)
edu = jd_data.get("education", {}).get("min_level", "")
edu = jd_data.get("education", {}).get("min_level", "Không yêu cầu")

req_skills = [s.get("name") for s in jd_data.get("required_skills", [])]
skills_str = ", ".join(req_skills) if req_skills else "Không yêu cầu kỹ năng cụ thể"

return f"Tuyển dụng vị trí {title}. Yêu cầu trình độ {edu}, tối thiểu {yoe} năm kinh nghiệm. Yêu cầu kỹ năng chuyên môn: {skills_str}."
desc = jd_data.get("description", "")
reqs = jd_data.get("requirements", "")
benefits = jd_data.get("benefits", "")

full_jd_context = (
f"Vị trí tuyển dụng: {title}\n"
f"Yêu cầu tối thiểu: Trình độ {edu}, tối thiểu {yoe} năm kinh nghiệm. Kỹ năng: {skills_str}.\n"
f"Mô tả công việc:\n{desc}\n"
f"Yêu cầu chi tiết:\n{reqs}\n"
f"Quyền lợi:\n{benefits}"
)
return full_jd_context

def get_embedding(text: str) -> list:
if not COLAB_API_URL:
Expand Down
Loading