From 55220e2192f702de099a7fc386db9d36acaddf1b Mon Sep 17 00:00:00 2001 From: xueqingkun Date: Tue, 13 May 2025 13:57:02 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=95=B0=E6=8D=AE=E5=BA=93sq?= =?UTF-8?q?l=E5=A4=87=E4=BB=BD=20=E6=B7=BB=E5=8A=A0=E7=B3=BB=E7=BB=9F?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E9=85=8D=E7=BD=AE=20=E4=BF=AE=E6=94=B9docker?= =?UTF-8?q?=E9=85=8D=E7=BD=AE=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker/web/docs/conf.d/http.conf | 2 +- docker/web/docs/conf.d/https.conf | 2 +- sql/v1.0.0.sql | 240 +++++++++++++++++++++++++++++ src/main/resources/application.yml | 4 + 4 files changed, 246 insertions(+), 2 deletions(-) create mode 100644 sql/v1.0.0.sql diff --git a/docker/web/docs/conf.d/http.conf b/docker/web/docs/conf.d/http.conf index caba22c..42c36ba 100644 --- a/docker/web/docs/conf.d/http.conf +++ b/docker/web/docs/conf.d/http.conf @@ -19,7 +19,7 @@ server { } location /pdf-qa-server/ { - proxy_pass http://pdf-qa-server/; + proxy_pass http://pdf-qa-server/pdf-qa-server/; proxy_set_header Host $http_host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; diff --git a/docker/web/docs/conf.d/https.conf b/docker/web/docs/conf.d/https.conf index 78510f1..aa12221 100644 --- a/docker/web/docs/conf.d/https.conf +++ b/docker/web/docs/conf.d/https.conf @@ -34,7 +34,7 @@ server { root /usr/share/nginx/html/dist; } - location /pdf-qa-server/ { + location /pdf-qa-server/pdf-qa-server/ { proxy_pass http://pdf-qa-server/; proxy_set_header Host $http_host; proxy_set_header X-Real-IP $remote_addr; diff --git a/sql/v1.0.0.sql b/sql/v1.0.0.sql new file mode 100644 index 0000000..70744ee --- /dev/null +++ b/sql/v1.0.0.sql @@ -0,0 +1,240 @@ +create table pdf_analysis_output +( + id serial + primary key, + layout_type integer not null, + content text, + page_no integer not null, + pdf_id integer not null, + table_title varchar(100), + display_order integer, + create_time timestamp default now() +); + +comment on column pdf_analysis_output.layout_type is '0-文本 1-表格'; + +comment on column pdf_analysis_output.content is 'pdf段落内容'; + +comment on column pdf_analysis_output.page_no is 'pdf页码'; + +comment on column pdf_analysis_output.pdf_id is 'pdf_info表的主键'; + +comment on column pdf_analysis_output.table_title is '表格标题'; + +comment on column pdf_analysis_output.display_order is '内容在pdf页面中的顺序,越小表示顺序越靠前'; + +alter table pdf_analysis_output + owner to postgres; + +create table pdf_info +( + id serial, + path varchar(200) not null, + filename varchar(100), + create_time timestamp default now(), + process_status integer default 0, + analysis_start_time timestamp, + analysis_end_time timestamp, + extraction_start_time timestamp, + extraction_end_time timestamp +); + +comment on table pdf_info is 'pdf信息'; + +comment on column pdf_info.path is 'pdf路径'; + +comment on column pdf_info.filename is '文件名'; + +comment on column pdf_info.create_time is '创建时间'; + +comment on column pdf_info.process_status is '处理状态 0:未分析 1:正在分析 2:分析成功 3:分析失败 4:开始数据抽取 5:抽取成功 6:抽取失败'; + +comment on column pdf_info.analysis_start_time is '开始分析时间'; + +comment on column pdf_info.analysis_end_time is '分析结束时间'; + +comment on column pdf_info.extraction_start_time is '开始抽取时间'; + +comment on column pdf_info.extraction_end_time is '结束抽取时间'; + +alter table pdf_info + owner to postgres; + +create table document_truncation +( + id varchar(255) not null + primary key, + document_id varchar(64), + section_id integer, + layout_type varchar(64), + content text, + create_time timestamp default CURRENT_TIMESTAMP, + update_time timestamp default CURRENT_TIMESTAMP, + title varchar(1024) +); + +comment on table document_truncation is '文档切分表'; + +comment on column document_truncation.document_id is '文档id(pdf_info表的id)'; + +comment on column document_truncation.section_id is '段落id pdf_analysis_output表的id'; + +comment on column document_truncation.layout_type is '布局类型 0-文本 1-表格'; + +comment on column document_truncation.content is '片段内容'; + +comment on column document_truncation.create_time is '创建时间'; + +comment on column document_truncation.update_time is '更新时间'; + +comment on column document_truncation.title is '标题'; + +alter table document_truncation + owner to postgres; + +create table truncation_entity_extraction +( + id varchar(255) not null + primary key, + truncation_id varchar(255), + entity text, + name text, + create_time timestamp default CURRENT_TIMESTAMP, + update_time timestamp default CURRENT_TIMESTAMP +); + +comment on table truncation_entity_extraction is '片段实体抽取'; + +comment on column truncation_entity_extraction.id is '主键'; + +comment on column truncation_entity_extraction.truncation_id is '片段id document_truncation表的id'; + +comment on column truncation_entity_extraction.entity is '标签(实体类型)'; + +comment on column truncation_entity_extraction.name is '实体名'; + +comment on column truncation_entity_extraction.create_time is '创建时间'; + +comment on column truncation_entity_extraction.update_time is '更新时间'; + +alter table truncation_entity_extraction + owner to postgres; + +create table truncation_er_attribute +( + id varchar(255) not null + primary key, + ter_id varchar(64), + association_type varchar(255), + attribute text, + value text, + data_type varchar(64), + create_time timestamp default CURRENT_TIMESTAMP, + update_time timestamp default CURRENT_TIMESTAMP +); + +comment on table truncation_er_attribute is '实体表'; + +comment on column truncation_er_attribute.ter_id is '片段实体属性表 既可以是truncation_entity_extraction表id也可以是truncation_relation_extraction表id'; + +comment on column truncation_er_attribute.association_type is '关联类型 0:terId关联的id为实体 1:terId关联的id为关系'; + +comment on column truncation_er_attribute.attribute is '实体名'; + +comment on column truncation_er_attribute.value is '值'; + +comment on column truncation_er_attribute.data_type is '数据类型 0:字符串 1:数字'; + +comment on column truncation_er_attribute.create_time is '创建时间'; + +comment on column truncation_er_attribute.update_time is '更新时间'; + +alter table truncation_er_attribute + owner to postgres; + +create table truncation_relation_extraction +( + id varchar(255) not null + primary key, + truncation_id varchar(255), + source text, + source_type varchar(255), + target text, + target_type varchar(255), + relation text, + create_time timestamp default CURRENT_TIMESTAMP, + update_time timestamp default CURRENT_TIMESTAMP +); + +comment on table truncation_relation_extraction is '片段关系抽取'; + +comment on column truncation_relation_extraction.source is '头节点'; + +comment on column truncation_relation_extraction.source_type is '头节点类型'; + +comment on column truncation_relation_extraction.target is '尾节点'; + +comment on column truncation_relation_extraction.target_type is '尾节点类型'; + +comment on column truncation_relation_extraction.relation is '关系'; + +comment on column truncation_relation_extraction.create_time is '创建时间'; + +comment on column truncation_relation_extraction.update_time is '更新时间'; + +alter table truncation_relation_extraction + owner to postgres; + +create table domain_metadata +( + id varchar(255) not null + primary key, + domain_type varchar(255), + source_type varchar(64), + relation varchar(64), + target_type varchar(255), + generation_type varchar(64), + create_time timestamp default CURRENT_TIMESTAMP, + update_time timestamp default CURRENT_TIMESTAMP +); + +comment on table domain_metadata is '领域元数据'; + +comment on column domain_metadata.domain_type is '领域类型'; + +comment on column domain_metadata.source_type is '头节点类型'; + +comment on column domain_metadata.relation is '关系'; + +comment on column domain_metadata.target_type is '尾节点类型'; + +comment on column domain_metadata.generation_type is '数据来源:0=手动录入,1=系统自动'; + +comment on column domain_metadata.create_time is '创建时间'; + +comment on column domain_metadata.update_time is '更新时间'; + +alter table domain_metadata + owner to postgres; + +create table chinese_english_words +( + chinese_word varchar(1024) not null + primary key, + english_word text, + create_time timestamp default CURRENT_TIMESTAMP, + update_time timestamp default CURRENT_TIMESTAMP +); + +comment on table chinese_english_words is '中英文对照字典'; + +comment on column chinese_english_words.chinese_word is '中文'; + +comment on column chinese_english_words.english_word is '英文'; + +comment on column chinese_english_words.create_time is '创建时间'; + +comment on column chinese_english_words.update_time is '更新时间'; + +alter table chinese_english_words + owner to postgres; \ No newline at end of file diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 6656742..775f5ee 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -1,3 +1,7 @@ +server: + port: 8080 + servlet: + context-path: /pdf-qa-server spring: application: name: pdf-qa-server