You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

50 lines
1.6 KiB
Java

package com.supervision.pdfqaserver.service.impl;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.StrUtil;
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import com.supervision.pdfqaserver.dto.TextTerm;
import com.supervision.pdfqaserver.service.TextToSegmentService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
@Slf4j
@Service
@RequiredArgsConstructor
public class TextToSegmentServiceImpl implements TextToSegmentService {
@Override
public List<TextTerm> segmentText(String text) {
if (StrUtil.isEmpty(text)){
return new ArrayList<>();
}
Segment segment = HanLP.newSegment()
.enableOrganizationRecognize(true)
.enablePlaceRecognize(true)
.enableNumberQuantifierRecognize(true);
List<Term> seg = segment.seg(text);
if (CollUtil.isEmpty(seg)){
return new ArrayList<>();
}
List<TextTerm> terms = new ArrayList<>();
for (Term term : seg) {
TextTerm textTerm = new TextTerm();
textTerm.setWord(term.word);
textTerm.setLabel(term.nature.toString());
terms.add(textTerm);
}
return terms;
}
@Override
public void addDict(String word, String label,int frequency) {
CustomDictionary.insert(word, label + " " + frequency);
}
}