improve musetalk lipsync and speed
parent
592312ab8c
commit
da9ffa9521
@ -1,100 +1,125 @@
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import cv2
|
||||
from face_parsing import FaceParsing
|
||||
|
||||
fp = FaceParsing()
|
||||
|
||||
def get_crop_box(box, expand):
|
||||
x, y, x1, y1 = box
|
||||
x_c, y_c = (x+x1)//2, (y+y1)//2
|
||||
w, h = x1-x, y1-y
|
||||
s = int(max(w, h)//2*expand)
|
||||
crop_box = [x_c-s, y_c-s, x_c+s, y_c+s]
|
||||
return crop_box, s
|
||||
|
||||
def face_seg(image):
|
||||
seg_image = fp(image)
|
||||
if seg_image is None:
|
||||
print("error, no person_segment")
|
||||
return None
|
||||
|
||||
seg_image = seg_image.resize(image.size)
|
||||
return seg_image
|
||||
|
||||
def get_image(image,face,face_box,upper_boundary_ratio = 0.5,expand=1.2):
|
||||
#print(image.shape)
|
||||
#print(face.shape)
|
||||
|
||||
body = Image.fromarray(image[:,:,::-1])
|
||||
face = Image.fromarray(face[:,:,::-1])
|
||||
|
||||
x, y, x1, y1 = face_box
|
||||
#print(x1-x,y1-y)
|
||||
crop_box, s = get_crop_box(face_box, expand)
|
||||
x_s, y_s, x_e, y_e = crop_box
|
||||
face_position = (x, y)
|
||||
|
||||
face_large = body.crop(crop_box)
|
||||
ori_shape = face_large.size
|
||||
|
||||
mask_image = face_seg(face_large)
|
||||
mask_small = mask_image.crop((x-x_s, y-y_s, x1-x_s, y1-y_s))
|
||||
mask_image = Image.new('L', ori_shape, 0)
|
||||
mask_image.paste(mask_small, (x-x_s, y-y_s, x1-x_s, y1-y_s))
|
||||
|
||||
# keep upper_boundary_ratio of talking area
|
||||
width, height = mask_image.size
|
||||
top_boundary = int(height * upper_boundary_ratio)
|
||||
modified_mask_image = Image.new('L', ori_shape, 0)
|
||||
modified_mask_image.paste(mask_image.crop((0, top_boundary, width, height)), (0, top_boundary))
|
||||
|
||||
blur_kernel_size = int(0.1 * ori_shape[0] // 2 * 2) + 1
|
||||
mask_array = cv2.GaussianBlur(np.array(modified_mask_image), (blur_kernel_size, blur_kernel_size), 0)
|
||||
mask_image = Image.fromarray(mask_array)
|
||||
|
||||
face_large.paste(face, (x-x_s, y-y_s, x1-x_s, y1-y_s))
|
||||
body.paste(face_large, crop_box[:2], mask_image)
|
||||
body = np.array(body)
|
||||
return body[:,:,::-1]
|
||||
|
||||
def get_image_prepare_material(image,face_box,upper_boundary_ratio = 0.5,expand=1.2):
|
||||
body = Image.fromarray(image[:,:,::-1])
|
||||
|
||||
x, y, x1, y1 = face_box
|
||||
#print(x1-x,y1-y)
|
||||
crop_box, s = get_crop_box(face_box, expand)
|
||||
x_s, y_s, x_e, y_e = crop_box
|
||||
|
||||
face_large = body.crop(crop_box)
|
||||
ori_shape = face_large.size
|
||||
|
||||
mask_image = face_seg(face_large)
|
||||
mask_small = mask_image.crop((x-x_s, y-y_s, x1-x_s, y1-y_s))
|
||||
mask_image = Image.new('L', ori_shape, 0)
|
||||
mask_image.paste(mask_small, (x-x_s, y-y_s, x1-x_s, y1-y_s))
|
||||
|
||||
# keep upper_boundary_ratio of talking area
|
||||
width, height = mask_image.size
|
||||
top_boundary = int(height * upper_boundary_ratio)
|
||||
modified_mask_image = Image.new('L', ori_shape, 0)
|
||||
modified_mask_image.paste(mask_image.crop((0, top_boundary, width, height)), (0, top_boundary))
|
||||
|
||||
blur_kernel_size = int(0.1 * ori_shape[0] // 2 * 2) + 1
|
||||
mask_array = cv2.GaussianBlur(np.array(modified_mask_image), (blur_kernel_size, blur_kernel_size), 0)
|
||||
return mask_array,crop_box
|
||||
|
||||
def get_image_blending(image,face,face_box,mask_array,crop_box):
|
||||
body = Image.fromarray(image[:,:,::-1])
|
||||
face = Image.fromarray(face[:,:,::-1])
|
||||
|
||||
x, y, x1, y1 = face_box
|
||||
x_s, y_s, x_e, y_e = crop_box
|
||||
face_large = body.crop(crop_box)
|
||||
|
||||
mask_image = Image.fromarray(mask_array)
|
||||
mask_image = mask_image.convert("L")
|
||||
face_large.paste(face, (x-x_s, y-y_s, x1-x_s, y1-y_s))
|
||||
body.paste(face_large, crop_box[:2], mask_image)
|
||||
body = np.array(body)
|
||||
return body[:,:,::-1]
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import cv2
|
||||
from face_parsing import FaceParsing
|
||||
import copy
|
||||
|
||||
fp = FaceParsing()
|
||||
|
||||
def get_crop_box(box, expand):
|
||||
x, y, x1, y1 = box
|
||||
x_c, y_c = (x+x1)//2, (y+y1)//2
|
||||
w, h = x1-x, y1-y
|
||||
s = int(max(w, h)//2*expand)
|
||||
crop_box = [x_c-s, y_c-s, x_c+s, y_c+s]
|
||||
return crop_box, s
|
||||
|
||||
def face_seg(image):
|
||||
seg_image = fp(image)
|
||||
if seg_image is None:
|
||||
print("error, no person_segment")
|
||||
return None
|
||||
|
||||
seg_image = seg_image.resize(image.size)
|
||||
return seg_image
|
||||
|
||||
def get_image(image,face,face_box,upper_boundary_ratio = 0.5,expand=1.2):
|
||||
#print(image.shape)
|
||||
#print(face.shape)
|
||||
|
||||
body = Image.fromarray(image[:,:,::-1])
|
||||
face = Image.fromarray(face[:,:,::-1])
|
||||
|
||||
x, y, x1, y1 = face_box
|
||||
#print(x1-x,y1-y)
|
||||
crop_box, s = get_crop_box(face_box, expand)
|
||||
x_s, y_s, x_e, y_e = crop_box
|
||||
face_position = (x, y)
|
||||
|
||||
face_large = body.crop(crop_box)
|
||||
ori_shape = face_large.size
|
||||
|
||||
mask_image = face_seg(face_large)
|
||||
mask_small = mask_image.crop((x-x_s, y-y_s, x1-x_s, y1-y_s))
|
||||
mask_image = Image.new('L', ori_shape, 0)
|
||||
mask_image.paste(mask_small, (x-x_s, y-y_s, x1-x_s, y1-y_s))
|
||||
|
||||
# keep upper_boundary_ratio of talking area
|
||||
width, height = mask_image.size
|
||||
top_boundary = int(height * upper_boundary_ratio)
|
||||
modified_mask_image = Image.new('L', ori_shape, 0)
|
||||
modified_mask_image.paste(mask_image.crop((0, top_boundary, width, height)), (0, top_boundary))
|
||||
|
||||
blur_kernel_size = int(0.1 * ori_shape[0] // 2 * 2) + 1
|
||||
mask_array = cv2.GaussianBlur(np.array(modified_mask_image), (blur_kernel_size, blur_kernel_size), 0)
|
||||
mask_image = Image.fromarray(mask_array)
|
||||
|
||||
face_large.paste(face, (x-x_s, y-y_s, x1-x_s, y1-y_s))
|
||||
body.paste(face_large, crop_box[:2], mask_image)
|
||||
body = np.array(body)
|
||||
return body[:,:,::-1]
|
||||
|
||||
def get_image_prepare_material(image,face_box,upper_boundary_ratio = 0.5,expand=1.2):
|
||||
body = Image.fromarray(image[:,:,::-1])
|
||||
|
||||
x, y, x1, y1 = face_box
|
||||
#print(x1-x,y1-y)
|
||||
crop_box, s = get_crop_box(face_box, expand)
|
||||
x_s, y_s, x_e, y_e = crop_box
|
||||
|
||||
face_large = body.crop(crop_box)
|
||||
ori_shape = face_large.size
|
||||
|
||||
mask_image = face_seg(face_large)
|
||||
mask_small = mask_image.crop((x-x_s, y-y_s, x1-x_s, y1-y_s))
|
||||
mask_image = Image.new('L', ori_shape, 0)
|
||||
mask_image.paste(mask_small, (x-x_s, y-y_s, x1-x_s, y1-y_s))
|
||||
|
||||
# keep upper_boundary_ratio of talking area
|
||||
width, height = mask_image.size
|
||||
top_boundary = int(height * upper_boundary_ratio)
|
||||
modified_mask_image = Image.new('L', ori_shape, 0)
|
||||
modified_mask_image.paste(mask_image.crop((0, top_boundary, width, height)), (0, top_boundary))
|
||||
|
||||
blur_kernel_size = int(0.1 * ori_shape[0] // 2 * 2) + 1
|
||||
mask_array = cv2.GaussianBlur(np.array(modified_mask_image), (blur_kernel_size, blur_kernel_size), 0)
|
||||
return mask_array,crop_box
|
||||
|
||||
# def get_image_blending(image,face,face_box,mask_array,crop_box):
|
||||
# body = Image.fromarray(image[:,:,::-1])
|
||||
# face = Image.fromarray(face[:,:,::-1])
|
||||
|
||||
# x, y, x1, y1 = face_box
|
||||
# x_s, y_s, x_e, y_e = crop_box
|
||||
# face_large = body.crop(crop_box)
|
||||
|
||||
# mask_image = Image.fromarray(mask_array)
|
||||
# mask_image = mask_image.convert("L")
|
||||
# face_large.paste(face, (x-x_s, y-y_s, x1-x_s, y1-y_s))
|
||||
# body.paste(face_large, crop_box[:2], mask_image)
|
||||
# body = np.array(body)
|
||||
# return body[:,:,::-1]
|
||||
|
||||
def get_image_blending(image,face,face_box,mask_array,crop_box):
|
||||
body = image
|
||||
x, y, x1, y1 = face_box
|
||||
x_s, y_s, x_e, y_e = crop_box
|
||||
face_large = copy.deepcopy(body[y_s:y_e, x_s:x_e])
|
||||
face_large[y-y_s:y1-y_s, x-x_s:x1-x_s]=face
|
||||
|
||||
mask_image = cv2.cvtColor(mask_array,cv2.COLOR_BGR2GRAY)
|
||||
mask_image = (mask_image/255).astype(np.float32)
|
||||
|
||||
# mask_not = cv2.bitwise_not(mask_array)
|
||||
# prospect_tmp = cv2.bitwise_and(face_large, face_large, mask=mask_array)
|
||||
# background_img = body[y_s:y_e, x_s:x_e]
|
||||
# background_img = cv2.bitwise_and(background_img, background_img, mask=mask_not)
|
||||
# body[y_s:y_e, x_s:x_e] = prospect_tmp + background_img
|
||||
|
||||
#print(mask_image.shape)
|
||||
#print(cv2.minMaxLoc(mask_image))
|
||||
|
||||
body[y_s:y_e, x_s:x_e] = cv2.blendLinear(face_large,body[y_s:y_e, x_s:x_e],mask_image,1-mask_image)
|
||||
|
||||
#body.paste(face_large, crop_box[:2], mask_image)
|
||||
return body
|
Loading…
Reference in New Issue