You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
50 lines
1.6 KiB
Java
50 lines
1.6 KiB
Java
3 months ago
|
package com.supervision.pdfqaserver.service.impl;
|
||
|
|
||
|
import cn.hutool.core.collection.CollUtil;
|
||
|
import cn.hutool.core.util.StrUtil;
|
||
|
import com.hankcs.hanlp.HanLP;
|
||
|
import com.hankcs.hanlp.dictionary.CustomDictionary;
|
||
|
import com.hankcs.hanlp.seg.Segment;
|
||
|
import com.hankcs.hanlp.seg.common.Term;
|
||
|
import com.supervision.pdfqaserver.dto.TextTerm;
|
||
|
import com.supervision.pdfqaserver.service.TextToSegmentService;
|
||
|
import lombok.RequiredArgsConstructor;
|
||
|
import lombok.extern.slf4j.Slf4j;
|
||
|
import org.springframework.stereotype.Service;
|
||
|
|
||
|
import java.util.ArrayList;
|
||
|
import java.util.List;
|
||
|
|
||
|
@Slf4j
|
||
|
@Service
|
||
|
@RequiredArgsConstructor
|
||
|
public class TextToSegmentServiceImpl implements TextToSegmentService {
|
||
|
@Override
|
||
|
public List<TextTerm> segmentText(String text) {
|
||
|
if (StrUtil.isEmpty(text)){
|
||
|
return new ArrayList<>();
|
||
|
}
|
||
|
Segment segment = HanLP.newSegment()
|
||
|
.enableOrganizationRecognize(true)
|
||
|
.enablePlaceRecognize(true)
|
||
|
.enableNumberQuantifierRecognize(true);
|
||
|
|
||
|
List<Term> seg = segment.seg(text);
|
||
|
if (CollUtil.isEmpty(seg)){
|
||
|
return new ArrayList<>();
|
||
|
}
|
||
|
List<TextTerm> terms = new ArrayList<>();
|
||
|
for (Term term : seg) {
|
||
|
TextTerm textTerm = new TextTerm();
|
||
|
textTerm.setWord(term.word);
|
||
|
textTerm.setLabel(term.nature.toString());
|
||
|
terms.add(textTerm);
|
||
|
}
|
||
|
return terms;
|
||
|
}
|
||
|
@Override
|
||
|
public void addDict(String word, String label,int frequency) {
|
||
|
CustomDictionary.insert(word, label + " " + frequency);
|
||
|
}
|
||
|
}
|