昨天工作:
- 进行转正答辩
- mv-dssm训练结束,效果参数有比较小的提升:acc_ = 0.752 auc_ = 0.754 precision_ = 0.805。
今天计划:
-
改善mv-dssm效果
-
生成inference数据,对最新的mvdssm模型进行效果测试。
-
增加multi feature attention层
pai -name tensorflow140 -Dscript="file:///home/hengsong/origin_deep_cluster_odps_8.tar.gz" -DentryFile="train_mv_dssm_v3.py" -Dcluster='{"worker":{"count":50, "cpu":200, "memory":4000}, "ps":{"count":10, "cpu":200, "memory":5000}}' -DuseSparseClusterSchema=True -DenableDynamicCluster=True -Dtables="odps://graph_embedding/tables/hs_train_data_dssm_v2_8,odps://graph_embedding/tables/hs_test_data_dssm_v2_8,odps://graph_embedding/tables/hs_tmp_267" -Doutputs="odps://graph_embedding/tables/hs_dssm_result_5" -DcheckpointDir="oss://bucket-automl/hengsong/?role_arn=acs:ram::1293303983251548:role/graph2018&host=cn-hangzhou.oss-internal.aliyun-inc.com" -DuserDefinedParameters="--learning_rate=3e-4 --batch_size=1024 --is_save_model=True --attention_type=1 --num_epochs=1 --ckpt=hs_ugc_video_2e_4.ckpt" -DuseSparseClusterSchema=True;
20190903081006203gvv33fck2
- 构造inference数据
insert overwrite table hs_query_title_inference_gt_4 select distinct a.*, b.url from hs_query_title_inference_gt_3 a join (select * from hs_tmp_349 where image_position > 0 and image_position < 6 and type = 1 and status = 1)b on a.title = b.title;
drop table hs_tmp_357;
yes
create table hs_tmp_357 as select distinct title from hs_query_title_inference_gt;
create table hs_tmp_359 as select title, row_number()over() as title_id from hs_tmp_357;
drop table hs_tmp_358;
yes
create table hs_tmp_358 as select a.title_id, b.* from hs_tmp_359 a join (select * from hs_tmp_349 where image_position > 0 and image_position < 6 and type = 1 and status = 1)b on a.title = b.title;
取前5条作为inference数据:
create table hs_tmp_360 as select *, row_number()over(partition by title order by item_id) from hs_tmp_358;
create table hs_tmp_361 as select title_id, item_id, title, url from hs_tmp_360 where _c1 < 6;
create table graph_embedding.hs_tmp_pic_inf_1 as
select *, search_offline:yuyan_udf_resnet50_fullcate_fc(search_offline:Imgto1d_Yuyan_python(search_offline:single_img_get(url))) as pic_ws from hs_tmp_361;
- 测试multi feature attention的效果
pai -name tensorflow140 -Dscript="file:///home/hengsong/origin_deep_cluster_odps_8.tar.gz" -DentryFile="train_mv_dssm_v4.py" -Dcluster='{"worker":{"count":1, "cpu":200, "memory":4000}, "ps":{"count":1, "cpu":200, "memory":5000}}' -DuseSparseClusterSchema=True -DenableDynamicCluster=True -Dtables="odps://graph_embedding/tables/hs_train_data_dssm_v2_8,odps://graph_embedding/tables/hs_test_data_dssm_v2_8,odps://graph_embedding/tables/hs_tmp_267" -Doutputs="odps://graph_embedding/tables/hs_dssm_result_5" -DcheckpointDir="oss://bucket-automl/hengsong/?role_arn=acs:ram::1293303983251548:role/graph2018&host=cn-hangzhou.oss-internal.aliyun-inc.com" -DuserDefinedParameters="--learning_rate=3e-4 --batch_size=1024 --is_save_model=True --attention_type=1 --num_epochs=1 --ckpt=hs_ugc_video_2e_4.ckpt" -DuseSparseClusterSchema=True;
20190903081006203gvv33fck2










网友评论