需要根据摄像头来判断视频流中人物是否在讲话。主要参考这个
iOS中Vision 的 faceObservation.landmarks.outerLips?.normalizedPoints数组对应的坐标点如下
1. 嘴巴外轮廓坐标点对应图形
outerLips?.normalizedPoints
截屏2024-03-22 11.23.34.png
2. 嘴巴内轮廓点图
faceObservation.landmarks.innerLips?.normalizedPoints
截屏2024-03-22 11.37.21.png
原理:
使用外轮廓数据,计算嘴巴的开合,这里是计算3和10点的嘴巴高度,13号和7号的嘴巴宽度,比率是w/h,设置闭上嘴巴的阈值,张开嘴巴比率越小,闭上嘴巴比率越大。这里是活体检测嘴巴开合。
检测说话是在一定帧数过程中,张开嘴巴的次数大于多少,可以设置为在说话,进行移动平均计算张开嘴巴次数。
3. html 对应的点位表示代码
<!DOCTYPE html>
<html>
<head>
<title>Plot Points with Index Labels</title>
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
</head>
<body>
<div id="plot"></div>
<script>
var data = [
{
x: [0.3206057548522949, 0.36859285831451416, 0.4291403591632843, 0.48517897725105286, 0.5408955216407776, 0.6040195226669312, 0.6594140529632568, 0.6974172592163086, 0.6471757292747498, 0.5750340223312378, 0.4925863742828369, 0.41400346159935, 0.3415397107601166, 0.2932305634021759],
y: [0.27181628346443176, 0.3185584545135498, 0.3487337827682495, 0.3392670154571533, 0.35050877928733826, 0.3238835036754608, 0.28128305077552795, 0.22625747323036194, 0.16709017753601074, 0.12685641646385193, 0.1108812466263771, 0.12153135985136032, 0.15762341022491455, 0.2138323336839676],
mode: 'markers+text',
type: 'scatter',
text: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13'],
textposition: 'top center',
marker: { size: 10 }
}
];
var layout = {
title: 'Plot Points with Index Labels',
xaxis: { title: 'X Axis' },
yaxis: { title: 'Y Axis' }
};
Plotly.newPlot('plot', data, layout);
</script>
</body>
</html>
检测工具
//
// LipTracker.swift
// VisionFaceTrack
// Created by JoyTim on 2024/3/22
// Copyright © 2024 Apple. All rights reserved.
//
import Foundation
class LipTracker {
private var lipOpenings: [Bool] = []
private var movingAverageWindowSize: Int = 10 // 移动平均窗口大小
private var lipPointsSequence: [[CGPoint]] = []
var threshold = 0.0
// 添加新的时序点
func addPoints(_ points: [CGPoint]) {
lipPointsSequence.append(points)
// 计算嘴巴开合情况并进行移动平均计算
calculateLipOpening()
}
// 计算嘴巴开合比率
private func calculateLipOpening() {
guard let lastPoints = lipPointsSequence.last else { return }
let lipHeight = lastPoints[3].y - lastPoints[10].y // 嘴巴高度
let lipWidth = lastPoints[7].x - lastPoints[13].x // 嘴巴宽度
let lipRatio = lipWidth / lipHeight // 嘴唇开合比率
// 将嘴巴开合比率与阈值进行比较,并记录结果
var isLipOpen = false
if threshold == 0 {
threshold = lipRatio
}else{
isLipOpen = lipRatio < threshold
}
print(lipRatio)
lipOpenings.append(isLipOpen)
// 移动平均计算嘴巴张开的次数
if lipOpenings.count > movingAverageWindowSize {
lipOpenings.removeFirst()
}
}
// 计算嘴巴张开的次数(移动平均值)
func countLipOpenings() -> Int {
return lipOpenings.filter { $0 }.count // 统计数组中为true的次数
}
// 清空时序点和嘴巴张开情况
func clearData() {
lipPointsSequence.removeAll()
lipOpenings.removeAll()
}
}
调用示例
if let dd = landmarks.outerLips?.normalizedPoints {
lipTracker.addPoints(dd)
if lipTracker.countLipOpenings() > 6 {
DispatchQueue.main.async {
self.lipLabel.text = "正在讲话"
print("==========正在讲话====")
}
}else{
DispatchQueue.main.async {
self.lipLabel.text = "未检测到"
}
}
}








网友评论