diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/.gitattributes b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..dfe0770424b2a19faf507a501ebfc23be8f54e7b --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/.gitattributes @@ -0,0 +1,2 @@ +# Auto detect text files and perform LF normalization +* text=auto diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/General/MOT_to_UA_Detrac.m b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/General/MOT_to_UA_Detrac.m new file mode 100644 index 0000000000000000000000000000000000000000..6535b96150f0519f2cad865cddf4273828b5cc1b --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/General/MOT_to_UA_Detrac.m @@ -0,0 +1,53 @@ +function MOT_to_UA_Detrac(gt_file, seq_name, save_folder, img_size) +% gt_file: MOT gt txt +% seq_name: save name +% save_folder: save dir +fileID = fopen(gt_file,'r'); +A = textscan(fileID,'%d %d %d %d %d %d %d %d %d %d','Delimiter',','); +fclose(fileID); + +M = [A{1},A{2},A{3},A{4},A{5},A{6},A{7},A{8},A{9}]; +max_fr = max(M(:,1)); +uniq_ids = unique(M(:,2)); +for n = 1:length(uniq_ids) + A{2}(M(:,2)==uniq_ids(n)) = n; +end +M(:,2) = A{2}; +max_id = length(uniq_ids); +X = zeros(max_fr,max_id); +Y = zeros(max_fr,max_id); +W = zeros(max_fr,max_id); +H = zeros(max_fr,max_id); +X(M(:,1)+(M(:,2)-1)*max_fr) = M(:,3); +Y(M(:,1)+(M(:,2)-1)*max_fr) = M(:,4); +W(M(:,1)+(M(:,2)-1)*max_fr) = M(:,5); +H(M(:,1)+(M(:,2)-1)*max_fr) = M(:,6); +gtInfo.X = X; +gtInfo.Y = Y; +gtInfo.W = W; +gtInfo.H = H; + +% visibility +V = zeros(size(W)); +for n = 1:size(W,1) + idx = find(W(n,:)~=0); + if length(idx)<=1 + V(n,idx) = 1; + continue + end + bbox = zeros(length(idx),4); + bbox(:,1) = X(n,idx)'; + bbox(:,2) = Y(n,idx)'; + bbox(:,3) = W(n,idx)'; + bbox(:,4) = H(n,idx)'; + [overlapRatio,~] = overlap(bbox,bbox); + for k = 1:length(idx) + overlapRatio(k,k) = 0; + end + max_overlap = max(overlapRatio,[],1); + V(n,idx) = 1-max_overlap; +end +gtInfo.V = V; +gtInfo.img_size = img_size; +save_path = [save_folder,'\',seq_name,'.mat']; +save(save_path,'gtInfo'); diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/General/create_pair.m b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/General/create_pair.m new file mode 100644 index 0000000000000000000000000000000000000000..fec1b635417bb290a5805c273607dee6b3a172cb --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/General/create_pair.m @@ -0,0 +1,63 @@ +function create_pair(dataset_dir, save_dir, num_pair, n_fold) +% dataset_dir: cropped image dir +% save_dir: output dir +% num_pair: number of pairs for each fold +% n_fold: the number of folds +class_list = dir(dataset_dir); +class_list(1:2) = []; +classNum = length(class_list); +instanceNum = zeros(1,classNum); +for n = 1:classNum + temp_dir = [dataset_dir,'\',class_list(n).name,'\*.png']; + sub_list = dir(temp_dir); + instanceNum(n) = length(sub_list); +end + + +fileID = fopen(save_dir,'w'); + + +K = n_fold; % 10 +pairNum = num_pair; % 300 +for k = 1:K + for n = 1:pairNum + d = 0; + while d<1 + temp_num = 0; + while temp_num<2 + rand_class = randi(classNum); + class_name = class_list(rand_class).name; + img_list = dir([dataset_dir,'\',class_name,'\*.png']); + temp_num = instanceNum(rand_class); + end + choose_idx = randperm(instanceNum(rand_class),2); + temp_name1 = img_list(choose_idx(1)).name; + temp_name2 = img_list(choose_idx(2)).name; + dot_loc1 = find(temp_name1=='.'); + dot_loc2 = find(temp_name2=='.'); + idx1 = str2double(temp_name1(dot_loc1-4:dot_loc1-1)); + idx2 = str2double(temp_name2(dot_loc2-4:dot_loc2-1)); + d = abs(idx1-idx2); + end + fprintf(fileID,'%s %d %d\n',class_name,idx1,idx2); + end + + for n = 1:pairNum + rand_class = randperm(classNum,2); + class_name1 = class_list(rand_class(1)).name; + class_name2 = class_list(rand_class(2)).name; + choose_idx1 = randperm(instanceNum(rand_class(1)),1); + choose_idx2 = randperm(instanceNum(rand_class(2)),1); + img_list1 = dir([dataset_dir,'\',class_name1,'\*.png']); + img_list2 = dir([dataset_dir,'\',class_name2,'\*.png']); + temp_name1 = img_list1(choose_idx1).name; + temp_name2 = img_list2(choose_idx2).name; + dot_loc1 = find(temp_name1=='.'); + dot_loc2 = find(temp_name2=='.'); + idx1 = str2double(temp_name1(dot_loc1-4:dot_loc1-1)); + idx2 = str2double(temp_name2(dot_loc2-4:dot_loc2-1)); + fprintf(fileID,'%s %d %s %d\n',class_name1,idx1,class_name2,idx2); + end +end + +fclose(fileID); \ No newline at end of file diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/General/crop_UA_Detrac.m b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/General/crop_UA_Detrac.m new file mode 100644 index 0000000000000000000000000000000000000000..f723db526c6e931c722486ed26f165e28f12fe7a --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/General/crop_UA_Detrac.m @@ -0,0 +1,51 @@ +function crop_UA_Detrac(gt_path, seq_name, img_folder, img_format, save_folder) + +% gt_path: gtInfo.mat, X,Y,W,H +% seq_name: name of the sequence +% img_folder: input image dir +% img_format: input image format, etc, png, jpg +% save_folder: cropped image dir + +load(gt_path) +margin_scale = 0.15; +resize_size = 182; +X = gtInfo.X; +Y = gtInfo.Y; +W = gtInfo.W; +H = gtInfo.H; +img_list = dir([img_folder,'\*.',img_format]); +for m = 1:length(img_list) + img_name = img_list(m).name; + img_path = [img_folder,'\',img_name]; + img = imread(img_path); + img_size = size(img); + num_id = size(H,2); + if m>size(gtInfo.H,1) + continue + end + for k = 1:num_id + if gtInfo.H(m,k)<1 + continue + end + xmin = round(X(m,k)); + ymin = round(Y(m,k)); + xmax = round(X(m,k)+W(m,k)-1); + ymax = round(Y(m,k)+H(m,k)-1); + min_side = min(xmax-xmin,ymax-ymin); + margin = min_side*margin_scale; + xmin = round(max(xmin-margin,1)); + ymin = round(max(ymin-margin,1)); + xmax = round(min(xmax+margin,img_size(2))); + ymax = round(min(ymax+margin,img_size(1))); + crop_img = img(ymin:ymax,xmin:xmax,:); + crop_img = imresize(crop_img, [resize_size,resize_size]); + class_name = [seq_name,'_',fileName(k,4)]; + class_folder = [save_folder,'\',class_name]; + if exist(class_folder,'dir')<=0 + mkdir(class_folder) + end + id_name = [class_name,'_',fileName(m,4)]; + save_path = [class_folder,'\',id_name,'.png']; + imwrite(crop_img, save_path); + end +end \ No newline at end of file diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/LICENSE b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..9e419e042146a2ce2e354202d4f7d8e4a3d66f31 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/LICENSE @@ -0,0 +1,674 @@ +GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. \ No newline at end of file diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/README.md b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a4151528b5fb844a5983143cf4cce2f3d7a6d4f2 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/README.md @@ -0,0 +1,187 @@ + + +# 基本信息 + +**发布者(Publisher):Huawei** + +**版本(Version):1.1** + +**修改时间(Modified) :2022.3.29** + +**框架(Framework):TensorFlow 1.15.0** + +**模型格式(Model Format):ckpt** + +**精度(Precision):Mixed** + +**处理器(Processor):昇腾910** + +**应用级别(Categories):Research** + +**描述(Description):基于TensorFlow框架的TNT网络训练代码** + +# 概述 + +TNT(TrackletNet Tracker)是一种性能优秀的跟踪器。 + +关键技术: + +- Tracklet-based Graph Model: 将tracklet作为顶点、将两个tracklets间相似度(的减函数)作为边权的无向图,可以通过顶点聚类算法完成 “tracklet-to-trajectory” 过程; +- Multi-scale TrackletNet:输入两 tracklets,输出其相似度,最大特点是用时域1D滤波器充分利用了 tracklets 的时态信息(temporal info); +- EG-IOU: 在做帧间detections关联时,使用 Epipolar Geometry(对极几何)对下一帧检测框做最佳预测,从而优化 IOU 算法; + +关键能力: + +- Graph Model 的设计可以充分使用时域信息、降低计算复杂度等; +- TrackletNet 作为一个统一(unified)的系统,将外观信息(appearance)和时态信息(temporal)合理地结合了起来;注意,传统的时态信息一般是 bbox 的位置、大小、运动等信息,而 TrackletNet 通过时域卷积池化等,挖掘了外观信息中蕴含的时态信息(即外观信息的时域连续性) +- EG 技术可以有效对抗相机运动带来的错误关联问题 + +参考论文: + +[https://arxiv.org/abs/1811.07258](https://gitee.com/link?target=https%3A%2F%2Farxiv.org%2Fabs%2F1811.07258) + +参考实现: + + [https://github.com/GaoangW/TNT](https://gitee.com/link?target=https%3A%2F%2Fgithub.com%2FGaoangW%2FTNT) + +第三方博客地址 + +[https://blog.csdn.net/qq_42191914/article/details/103619045](https://gitee.com/link?target=https%3A%2F%2Fblog.csdn.net%2Fqq_42191914%2Farticle%2Fdetails%2F103619045) + +适配昇腾 AI 处理器的实现: + +[https://gitee.com/ascend/modelzoo/tree/master/contrib/TensorFlow/Research/cv/TNT_ID1233_for_TensorFlow + +通过Git获取对应commit\_id的代码方法如下: + +``` +git clone {repository_url} # 克隆仓库的代码 +cd {repository_name} # 切换到模型的代码仓目录 +git checkout {branch} # 切换到对应分支 +git reset --hard {commit_id} # 代码设置到对应的commit_id +cd {code_path} # 切换到模型代码所在路径,若仓库下只有该模型,则无需切换 +``` + +# 训练环境准备 + +1. 硬件环境准备请参见各硬件产品文档"[驱动和固件安装升级指南]( https://support.huawei.com/enterprise/zh/category/ai-computing-platform-pid-1557196528909)"。需要在硬件设备上安装与CANN版本配套的固件与驱动。 + +2. 宿主机上需要安装Docker并登录[Ascend Hub中心](https://ascendhub.huawei.com/#/detail?name=ascend-tensorflow-arm)获取镜像。 + + 当前模型支持的镜像列表如[表1](#zh-cn_topic_0000001074498056_table1519011227314)所示。 + + **表 1** 镜像列表 + + + + + + + + + + + + + +

镜像名称

+

镜像版本

+

配套CANN版本

+
+

20.2.0

+

20.2

+
+ +## 快速上手 + +- 数据集准备 + +1. 模型训练使用MOT17Det数据集,数据集请用户自行获取。 + +2. TNT/General/MOT_to_UA_Detrac.m + MOT_to_UA_Detrac(gt_file, seq_name, save_folder, img_size) + + **作者提供的典型参数**:MOT_to_UA_Detrac('.txt', '1', 'save_dir', [800,600]); + gt_file: MOT gt txt + seq_name: save name + save_folder: save dir + +3. TNT/General/crop_UA_Detrac.m + crop_UA_Detrac(gt_path, seq_name, img_folder, img_format, save_folder) + + **作者提供的典型参数**:crop_UA_Detrac('gt_path.mat', '1', 'the folder contains the sequence images', 'jpg', 'save_folder'); + gt_path: gtInfo.mat, X,Y,W,H + seq_name: name of the sequence + img_folder: input image dir + img_format: input image format, etc, png, jpg + save_folder: cropped image dir + +4. TNT/General/create_pair + create_pair(dataset_dir, save_dir, num_pair, n_fold) + + **作者提供的典型参**:create_pair('dataset_dir', 'save_dir', 300, 10) + dataset_dir: cropped image dir + save_dir: output dir + num_pair: number of pairs for each fold + n_fold: the number of folds + 此步骤完成后,需打开所有pairs.txt,在文件头添加一个空行 + +**注意事项**: +* matlab版本需为2014b及以后的版本,否则会找不到bboxOverlapRatio这个函数(该函数于2014b版本被引进,之前有源码现已无法找到) + +* create_pair.m中可能会出现K、pairNum报错的现象,解决方法是将其替换为纯数字就好 + +**文件安排**: + 将所有mat文件,放在一个文件夹下,放在原始数据MOT17Det的下级目录中,(为定位该位置,此处应原有两个文件夹分别为train和test)而裁剪所得到的图片和pairs.txt仿照lfw数据集格式存放,即存放在类似MOT17Det/train/MOT17-02/img1和MOT17Det/train/MOT17-02/pairs.txt + +## 模型预训练 + +请用户根据实际路径配置data_dir、lfw_dir、lfw_pairs、pretrained_model等输入路径和log_base_dir、models_base_dir等输出路径参数,利用TNT/src/my_train_tripletloss.py剪裁后的数据和FaceNet网络训练三重态外观模型文件(注意,process_data目录下有多组数据,如MOT17-02,MOT17-04等,故在利用一组数据完成训练后,应及时修改数据地址,再开始下一组数据的训练) + +典型参数如下: + +``` +python 3.7 ${code_dir}/train_tripletloss.py \ + --logs_base_dir ${output_path}/tripletloss_logs/ \ + --models_base_dir ${output_path}/models/MOT17-02/ \ + --data_dir ${data_path}/processed_data/MOT17-02/img/ \ + --lfw_dir ${data_path}/processed_data/MOT17-02/img \ + --lfw_pairs ${data_path}/processed_data/MOT17-02/pairs.txt \ + --pretrained_model ${data_path}/models/pretrained/model-20180402-114759.ckpt-275 \ + --image_size 160 \ + --model_def models.inception_resnet_v1 \ + --optimizer RMSPROP \ + --learning_rate 0.01 \ + --weight_decay 1e-4 \ + --max_nrof_epochs 500 \ + --embedding_size 512 \ + --batch_size 30 \ + --people_per_batch 15 \ + --images_per_person 10 \ + --epoch_size 100 +``` + +(为加快数据传输速度,一般选择在每组数据目录(如dataset/processed_data/MOT17-02/)下,创建model文件存放目录,然后将该组数据地址(如dataset/processed_data/MOT17-02/)设为data path in obs) + +示例数据集地址: + +URL: +https://e-share.obs-website.cn-north-1.myhuaweicloud.com?token=kgEgUsFhjaxRtkatN8fRvHyfzFIiPgPMLfABSMULPiLhGFN4hT3ATq1R6KpKf4mVhehzLTPYxD8HJ/GqQ0Z/ATpbVj+cLIc6+J5O98fzKldbzObw4KcTp+z37WFHwqA6hlZ1cBe0NvBwxi4bulpqNoRWxxAzBbmqM4tgC/I/lK7s1u5R0ZcdUYMc8ZB3kN2BJap33XDuZ9qmTJQSOthIoKXCWOIQZi+Aly6fvZoSbYWUcmr5QdXDFOwmXgWIQuEIbOA39Z0qWaJrnvwnwpIBMV39RDma2Kz18hD760ZVLOo4naDtZLXwVQ7Wbgw46saJDDUAoXNkPHOlAA2OlGpoirjpj1P+DLV6GNTa4OJs2itfDfYhY3EMl+NZZC2+1+M0Fer8FK7agCQhZ2rcGgatPlXTEKjd1GTVtOOoIPfPC3A2YzbZBxye+JNwWJO8xbf8E/t7FJcNBp2UWZIefwB0uzSVHTjYxuYF1xghqovSBDwQ76o7ckNTRuI6+9YeNMlaZpj5DWF5pTrM12l43LMSzdeYUlugxLCofFrWH24IpEQ= + +提取码: +123456 + +*有效期至: 2023/03/24 21:20:13 GMT+08:00 + +# 训练二维跟踪模型 + +​ 在TNT/train_cnn_trajectory_2d.py中所有函数的定义前设置目录路径,(注意triplet_model为之前预训练模型)。根据数据密度更改样本概率(样本概率)sample_prob中的元素数是输入Mat文件的数目。开始时将学习率(lr)设置为1e-3。每2000步,将lr降低10倍,直到达到1e-5。输出模型将存储在save_dir中。运行TNT/my_train_cnn_trajectory_2d.py。(以上步骤已封装到test/train_full_1p.sh中) + + + +# 精度结果 + +| GPU(199epoch) | NPU(199epoch) | +| ------------- | ------------- | +| 0.790625 | 0.8 | + diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/TC_tracker.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/TC_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..4dc2bd0dda74f4cc3bb6a132f394e41b1027a987 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/TC_tracker.py @@ -0,0 +1,72 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +/* + * Copyright ©2019 Gaoang Wang. All rights reserved. Permission is + * hereby granted for academic use. No other use, copying, distribution, or modification + * is permitted without prior written consent. Copyrights for + * third-party components of this work must be honored. Instructors + * interested in reusing these course materials should contact the + * author. + */ + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +import numpy as np +import argparse +import facenet +import lfw +import os +import sys +import cv2 +import pickle + +from tensorflow.python.ops import data_flow_ops +from sklearn import metrics +from scipy.optimize import brentq +from scipy import interpolate +from scipy.interpolate import interp1d +from scipy.io import loadmat +from scipy import misc +import matplotlib.pyplot as plt +import matplotlib.patches as patches +from sklearn import svm +from sklearn.externals import joblib +from sklearn.ensemble import RandomForestClassifier +from sklearn.datasets import make_classification +from PIL import Image +import seq_nn_3d_v2 +import tracklet_utils_3c +import track_lib + + + +track_struct = tracklet_utils_3c.TC_tracker() + diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/command.txt b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/command.txt new file mode 100644 index 0000000000000000000000000000000000000000..bfe8a4c0c97a2b0595759b86761cc1128a1a4f48 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/command.txt @@ -0,0 +1,5 @@ +python src/validate_on_lfw.py D:/Code/update_facenet/facenet-master/facenet-master/datasets/lfw/align_img D:/Code/update_facenet/facenet-master/20180402-114759/20180402-114759 --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization + +python src/validate_on_lfw.py D:/Code/update_facenet/facenet-master/facenet-master/datasets/lfw/align_img C:\Users\gaoan\models\facenet\20180626-134545 --distance_metric 0 --use_flipped_images --subtract_mean --use_fixed_image_standardization + +python src/train_tripletloss.py --logs_base_dir ~/logs/facenet/ --models_base_dir ~/models/facenet/ --data_dir D:/Code/update_facenet/facenet-master/facenet-master/datasets/lfw/align_img --image_size 160 --model_def models.inception_resnet_v1 --lfw_dir D:/Code/update_facenet/facenet-master/facenet-master/datasets/lfw/align_img --optimizer RMSPROP --learning_rate 0.01 --weight_decay 1e-4 --max_nrof_epochs 500 --pretrained_model D:/Code/update_facenet/facenet-master/20180408-102900/20180408-102900/model-20180408-102900.ckpt-90 --embedding_size 512 --batch_size 30 --people_per_batch 15 --images_per_person 10 --epoch_size 100 --learning_rate 0.0001 \ No newline at end of file diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/facenet.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/facenet.py new file mode 100644 index 0000000000000000000000000000000000000000..0ac7a9732603442a98ca35ebaa584896dc642d81 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/facenet.py @@ -0,0 +1,626 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Functions for building the face recognition network. +""" +# MIT License +# +# Copyright (c) 2016 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# pylint: disable=missing-docstring +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from npu_bridge.npu_init import * + +import os +from subprocess import Popen, PIPE +import tensorflow as tf +import numpy as np +from scipy import misc +from sklearn.model_selection import KFold +from scipy import interpolate +from tensorflow.python.training import training +import random +import re +from tensorflow.python.platform import gfile +import math +from six import iteritems + +def triplet_loss(anchor, positive, negative, alpha): + """Calculate the triplet loss according to the FaceNet paper + + Args: + anchor: the embeddings for the anchor images. + positive: the embeddings for the positive images. + negative: the embeddings for the negative images. + + Returns: + the triplet loss according to the FaceNet paper as a float tensor. + """ + with tf.variable_scope('triplet_loss'): + pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1) + neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1) + + basic_loss = tf.add(tf.subtract(pos_dist,neg_dist), alpha) + loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0) + + return loss + +def center_loss(features, label, alfa, nrof_classes): + """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" + (http://ydwen.github.io/papers/WenECCV16.pdf) + """ + nrof_features = features.get_shape()[1] + centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32, + initializer=tf.constant_initializer(0), trainable=False) + label = tf.reshape(label, [-1]) + centers_batch = tf.gather(centers, label) + diff = (1 - alfa) * (centers_batch - features) + centers = tf.scatter_sub(centers, label, diff) + with tf.control_dependencies([centers]): + loss = tf.reduce_mean(tf.square(features - centers_batch)) + return loss, centers + +def get_image_paths_and_labels(dataset): + image_paths_flat = [] + labels_flat = [] + for i in range(len(dataset)): + image_paths_flat += dataset[i].image_paths + labels_flat += [i] * len(dataset[i].image_paths) + return image_paths_flat, labels_flat + +def shuffle_examples(image_paths, labels): + shuffle_list = list(zip(image_paths, labels)) + random.shuffle(shuffle_list) + image_paths_shuff, labels_shuff = zip(*shuffle_list) + return image_paths_shuff, labels_shuff + +def random_rotate_image(image): + angle = np.random.uniform(low=-10.0, high=10.0) + return misc.imrotate(image, angle, 'bicubic') + +# 1: Random rotate 2: Random crop 4: Random flip 8: Fixed image standardization 16: Flip +RANDOM_ROTATE = 1 +RANDOM_CROP = 2 +RANDOM_FLIP = 4 +FIXED_STANDARDIZATION = 8 +FLIP = 16 + +def parser_data(filenames, label, control, image_size): + images = [] + for filename in tf.unstack(filenames): + file_contents = tf.read_file(filename) + image = tf.image.decode_image(file_contents, 3, dtype = tf.float32) + image = tf.cond(get_control_flag(control[0], RANDOM_ROTATE), + lambda: tf.py_func(random_rotate_image, [image], tf.float32), + lambda: tf.identity(image)) + image = tf.cond(get_control_flag(control[0], RANDOM_CROP), + lambda: tf.random_crop(image, image_size + (3,)), + lambda: tf.image.resize_image_with_crop_or_pad(image, image_size[0], image_size[1])) + image = tf.cond(get_control_flag(control[0], RANDOM_FLIP), + lambda: tf.image.random_flip_left_right(image), + lambda: tf.identity(image)) + image = tf.cond(get_control_flag(control[0], FIXED_STANDARDIZATION), + lambda: (tf.cast(image, tf.float32) - 127.5) / 128.0, + lambda: tf.image.per_image_standardization(image)) + image = tf.cond(get_control_flag(control[0], FLIP), + lambda: tf.image.flip_left_right(image), + lambda: tf.identity(image)) + # pylint: disable=no-member + image.set_shape(image_size + (3,)) + images.append(image) + return [images, label] + +def create_input_pipeline(input_queue, image_size, nrof_preprocess_threads, batch_size_placeholder): + images_and_labels_list = [] + for _ in range(nrof_preprocess_threads): + filenames, label, control = input_queue.dequeue() + images = [] + for filename in tf.unstack(filenames): + file_contents = tf.read_file(filename) + image = tf.image.decode_image(file_contents, 3) + image = tf.cond(get_control_flag(control[0], RANDOM_ROTATE), + lambda:tf.py_func(random_rotate_image, [image], tf.uint8), + lambda:tf.identity(image)) + image = tf.cond(get_control_flag(control[0], RANDOM_CROP), + lambda:tf.random_crop(image, image_size + (3,)), + lambda:tf.image.resize_image_with_crop_or_pad(image, image_size[0], image_size[1])) + image = tf.cond(get_control_flag(control[0], RANDOM_FLIP), + lambda:tf.image.random_flip_left_right(image), + lambda:tf.identity(image)) + image = tf.cond(get_control_flag(control[0], FIXED_STANDARDIZATION), + lambda:(tf.cast(image, tf.float32) - 127.5)/128.0, + lambda:tf.image.per_image_standardization(image)) + image = tf.cond(get_control_flag(control[0], FLIP), + lambda:tf.image.flip_left_right(image), + lambda:tf.identity(image)) + #pylint: disable=no-member + image.set_shape(image_size + (3,)) + images.append(image) + images_and_labels_list.append([images, label]) + + image_batch, label_batch = tf.train.batch_join( + images_and_labels_list, batch_size=batch_size_placeholder, + shapes=[image_size + (3,), ()], enqueue_many=True, + capacity=4 * nrof_preprocess_threads * 100, + allow_smaller_final_batch=True) + + return image_batch, label_batch + +def get_control_flag(control, field): + return tf.equal(tf.mod(tf.floor_div(control, field), 2), 1) + +def _add_loss_summaries(total_loss): + """Add summaries for losses. + + Generates moving average for all losses and associated summaries for + visualizing the performance of the network. + + Args: + total_loss: Total loss from loss(). + Returns: + loss_averages_op: op for generating moving averages of losses. + """ + # Compute the moving average of all individual losses and the total loss. + loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') + losses = tf.get_collection('losses') + loss_averages_op = loss_averages.apply(losses + [total_loss]) + + # Attach a scalar summmary to all individual losses and the total loss; do the + # same for the averaged version of the losses. + for l in losses + [total_loss]: + # Name each loss as '(raw)' and name the moving average version of the loss + # as the original loss name. + tf.summary.scalar(l.op.name +' (raw)', l) + tf.summary.scalar(l.op.name, loss_averages.average(l)) + + return loss_averages_op + +def train(total_loss, global_step, optimizer, learning_rate, moving_average_decay, update_gradient_vars, log_histograms=True): + # Generate moving averages of all losses and associated summaries. + loss_averages_op = _add_loss_summaries(total_loss) + + # Compute gradients. + with tf.control_dependencies([loss_averages_op]): + if optimizer=='ADAGRAD': + opt = tf.train.AdagradOptimizer(learning_rate) + elif optimizer=='ADADELTA': + opt = tf.train.AdadeltaOptimizer(learning_rate, rho=0.9, epsilon=1e-6) + elif optimizer=='ADAM': + opt = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=0.1) + elif optimizer=='RMSPROP': + opt = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9, epsilon=1.0) + elif optimizer=='MOM': + opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True) + else: + raise ValueError('Invalid optimization algorithm') + + grads = opt.compute_gradients(total_loss, update_gradient_vars) + + # Apply gradients. + apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) + + # Add histograms for trainable variables. + if log_histograms: + for var in tf.trainable_variables(): + tf.summary.histogram(var.op.name, var) + + # Add histograms for gradients. + if log_histograms: + for grad, var in grads: + if grad is not None: + tf.summary.histogram(var.op.name + '/gradients', grad) + + # Track the moving averages of all trainable variables. + variable_averages = tf.train.ExponentialMovingAverage( + moving_average_decay, global_step) + variables_averages_op = variable_averages.apply(tf.trainable_variables()) + + with tf.control_dependencies([apply_gradient_op, variables_averages_op]): + train_op = tf.no_op(name='train') + + return train_op + +def prewhiten(x): + mean = np.mean(x) + std = np.std(x) + std_adj = np.maximum(std, 1.0/np.sqrt(x.size)) + y = np.multiply(np.subtract(x, mean), 1/std_adj) + return y + +def crop(image, random_crop, image_size): + if image.shape[1]>image_size: + sz1 = int(image.shape[1]//2) + sz2 = int(image_size//2) + if random_crop: + diff = sz1-sz2 + (h, v) = (np.random.randint(-diff, diff+1), np.random.randint(-diff, diff+1)) + else: + (h, v) = (0,0) + image = image[(sz1-sz2+v):(sz1+sz2+v),(sz1-sz2+h):(sz1+sz2+h),:] + return image + +def flip(image, random_flip): + if random_flip and np.random.choice([True, False]): + image = np.fliplr(image) + return image + +def to_rgb(img): + w, h = img.shape + ret = np.empty((w, h, 3), dtype=np.uint8) + ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img + return ret + +def load_data(image_paths, do_random_crop, do_random_flip, image_size, do_prewhiten=True): + nrof_samples = len(image_paths) + images = np.zeros((nrof_samples, image_size, image_size, 3)) + for i in range(nrof_samples): + img = misc.imread(image_paths[i]) + if img.ndim == 2: + img = to_rgb(img) + if do_prewhiten: + img = prewhiten(img) + img = crop(img, do_random_crop, image_size) + img = flip(img, do_random_flip) + images[i,:,:,:] = img + return images + +def get_label_batch(label_data, batch_size, batch_index): + nrof_examples = np.size(label_data, 0) + j = batch_index*batch_size % nrof_examples + if j+batch_size<=nrof_examples: + batch = label_data[j:j+batch_size] + else: + x1 = label_data[j:nrof_examples] + x2 = label_data[0:nrof_examples-j] + batch = np.vstack([x1,x2]) + batch_int = batch.astype(np.int64) + return batch_int + +def get_batch(image_data, batch_size, batch_index): + nrof_examples = np.size(image_data, 0) + j = batch_index*batch_size % nrof_examples + if j+batch_size<=nrof_examples: + batch = image_data[j:j+batch_size,:,:,:] + else: + x1 = image_data[j:nrof_examples,:,:,:] + x2 = image_data[0:nrof_examples-j,:,:,:] + batch = np.vstack([x1,x2]) + batch_float = batch.astype(np.float32) + return batch_float + +def get_triplet_batch(triplets, batch_index, batch_size): + ax, px, nx = triplets + a = get_batch(ax, int(batch_size/3), batch_index) + p = get_batch(px, int(batch_size/3), batch_index) + n = get_batch(nx, int(batch_size/3), batch_index) + batch = np.vstack([a, p, n]) + return batch + +def get_learning_rate_from_file(filename, epoch): + with open(filename, 'r') as f: + for line in f.readlines(): + line = line.split('#', 1)[0] + if line: + par = line.strip().split(':') + e = int(par[0]) + if par[1]=='-': + lr = -1 + else: + lr = float(par[1]) + if e <= epoch: + learning_rate = lr + else: + return learning_rate + +class ImageClass(): + "Stores the paths to images for a given class" + def __init__(self, name, image_paths): + self.name = name + self.image_paths = image_paths + + def __str__(self): + return self.name + ', ' + str(len(self.image_paths)) + ' images' + + def __len__(self): + return len(self.image_paths) + +def get_dataset(path, has_class_directories=True): + dataset = [] + path_exp = os.path.expanduser(path) + classes = [path for path in os.listdir(path_exp) \ + if os.path.isdir(os.path.join(path_exp, path))] + classes.sort() + nrof_classes = len(classes) + for i in range(nrof_classes): + class_name = classes[i] + facedir = os.path.join(path_exp, class_name) + image_paths = get_image_paths(facedir) + dataset.append(ImageClass(class_name, image_paths)) + + return dataset + +def get_image_paths(facedir): + image_paths = [] + if os.path.isdir(facedir): + images = os.listdir(facedir) + image_paths = [os.path.join(facedir,img) for img in images] + return image_paths + +def split_dataset(dataset, split_ratio, min_nrof_images_per_class, mode): + if mode=='SPLIT_CLASSES': + nrof_classes = len(dataset) + class_indices = np.arange(nrof_classes) + np.random.shuffle(class_indices) + split = int(round(nrof_classes*(1-split_ratio))) + train_set = [dataset[i] for i in class_indices[0:split]] + test_set = [dataset[i] for i in class_indices[split:-1]] + elif mode=='SPLIT_IMAGES': + train_set = [] + test_set = [] + for cls in dataset: + paths = cls.image_paths + np.random.shuffle(paths) + nrof_images_in_class = len(paths) + split = int(math.floor(nrof_images_in_class*(1-split_ratio))) + if split==nrof_images_in_class: + split = nrof_images_in_class-1 + if split>=min_nrof_images_per_class and nrof_images_in_class-split>=1: + train_set.append(ImageClass(cls.name, paths[:split])) + test_set.append(ImageClass(cls.name, paths[split:])) + else: + raise ValueError('Invalid train/test split mode "%s"' % mode) + return train_set, test_set + +def load_model(model, input_map=None): + # Check if the model is a model directory (containing a metagraph and a checkpoint file) + # or if it is a protobuf file with a frozen graph + model_exp = os.path.expanduser(model) + if (os.path.isfile(model_exp)): + print('Model filename: %s' % model_exp) + with gfile.FastGFile(model_exp,'rb') as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + tf.import_graph_def(graph_def, input_map=input_map, name='') + else: + print('Model directory: %s' % model_exp) + meta_file, ckpt_file = get_model_filenames(model_exp) + + print('Metagraph file: %s' % meta_file) + print('Checkpoint file: %s' % ckpt_file) + + saver = tf.train.import_meta_graph(os.path.join(model_exp, meta_file), input_map=input_map) + saver.restore(tf.get_default_session(), os.path.join(model_exp, ckpt_file)) + +def get_model_filenames(model_dir): + files = os.listdir(model_dir) + meta_files = [s for s in files if s.endswith('.meta')] + if len(meta_files)==0: + raise ValueError('No meta file found in the model directory (%s)' % model_dir) + elif len(meta_files)>1: + raise ValueError('There should not be more than one meta file in the model directory (%s)' % model_dir) + meta_file = meta_files[0] + ckpt = tf.train.get_checkpoint_state(model_dir) + if ckpt and ckpt.model_checkpoint_path: + ckpt_file = os.path.basename(ckpt.model_checkpoint_path) + return meta_file, ckpt_file + + meta_files = [s for s in files if '.ckpt' in s] + max_step = -1 + for f in files: + step_str = re.match(r'(^model-[\w\- ]+.ckpt-(\d+))', f) + if step_str is not None and len(step_str.groups())>=2: + step = int(step_str.groups()[1]) + if step > max_step: + max_step = step + ckpt_file = step_str.groups()[0] + return meta_file, ckpt_file + +def distance(embeddings1, embeddings2, distance_metric=0): + if distance_metric==0: + # Euclidian distance + diff = np.subtract(embeddings1, embeddings2) + dist = np.sum(np.square(diff),1) + elif distance_metric==1: + # Distance based on cosine similarity + dot = np.sum(np.multiply(embeddings1, embeddings2), axis=1) + norm = np.linalg.norm(embeddings1, axis=1) * np.linalg.norm(embeddings2, axis=1) + similarity = dot / norm + dist = np.arccos(similarity) / math.pi + else: + raise 'Undefined distance metric %d' % distance_metric + + return dist + +def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False): + assert(embeddings1.shape[0] == embeddings2.shape[0]) + assert(embeddings1.shape[1] == embeddings2.shape[1]) + nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) + nrof_thresholds = len(thresholds) + k_fold = KFold(n_splits=nrof_folds, shuffle=False) + + tprs = np.zeros((nrof_folds,nrof_thresholds)) + fprs = np.zeros((nrof_folds,nrof_thresholds)) + accuracy = np.zeros((nrof_folds)) + + indices = np.arange(nrof_pairs) + + for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): + if subtract_mean: + mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0) + else: + mean = 0.0 + dist = distance(embeddings1-mean, embeddings2-mean, distance_metric) + + # Find the best threshold for the fold + acc_train = np.zeros((nrof_thresholds)) + for threshold_idx, threshold in enumerate(thresholds): + _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set]) + best_threshold_index = np.argmax(acc_train) + for threshold_idx, threshold in enumerate(thresholds): + tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set]) + _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set]) + + tpr = np.mean(tprs,0) + fpr = np.mean(fprs,0) + return tpr, fpr, accuracy + +def calculate_accuracy(threshold, dist, actual_issame): + predict_issame = np.less(dist, threshold) + tp = np.sum(np.logical_and(predict_issame, actual_issame)) + fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) + tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame))) + fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame)) + + tpr = 0 if (tp+fn==0) else float(tp) / float(tp+fn) + fpr = 0 if (fp+tn==0) else float(fp) / float(fp+tn) + acc = float(tp+tn)/dist.size + return tpr, fpr, acc + + + +def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10, distance_metric=0, + subtract_mean=False): + assert(embeddings1.shape[0] == embeddings2.shape[0]) + assert(embeddings1.shape[1] == embeddings2.shape[1]) + nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) + nrof_thresholds = len(thresholds) + k_fold = KFold(n_splits=nrof_folds, shuffle=False) + + val = np.zeros(nrof_folds) + far = np.zeros(nrof_folds) + + indices = np.arange(nrof_pairs) + + for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): + if subtract_mean: + mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0) + else: + mean = 0.0 + dist = distance(embeddings1-mean, embeddings2-mean, distance_metric) + + # Find the threshold that gives FAR = far_target + far_train = np.zeros(nrof_thresholds) + for threshold_idx, threshold in enumerate(thresholds): + _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set]) + if np.max(far_train)>=far_target: + f = interpolate.interp1d(far_train, thresholds, kind='slinear') + threshold = f(far_target) + else: + threshold = 0.0 + + val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set]) + + val_mean = np.mean(val) + far_mean = np.mean(far) + val_std = np.std(val) + return val_mean, val_std, far_mean + + +def calculate_val_far(threshold, dist, actual_issame): + predict_issame = np.less(dist, threshold) + true_accept = np.sum(np.logical_and(predict_issame, actual_issame)) + false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) + n_same = np.sum(actual_issame) + n_diff = np.sum(np.logical_not(actual_issame)) + val = float(true_accept) / float(n_same) + far = float(false_accept) / float(n_diff) + return val, far + +def store_revision_info(src_path, output_dir, arg_string): + try: + # Get git hash + cmd = ['git', 'rev-parse', 'HEAD'] + gitproc = Popen(cmd, stdout = PIPE, cwd=src_path) + (stdout, _) = gitproc.communicate() + git_hash = stdout.strip() + except OSError as e: + git_hash = ' '.join(cmd) + ': ' + e.strerror + + try: + # Get local changes + cmd = ['git', 'diff', 'HEAD'] + gitproc = Popen(cmd, stdout = PIPE, cwd=src_path) + (stdout, _) = gitproc.communicate() + git_diff = stdout.strip() + except OSError as e: + git_diff = ' '.join(cmd) + ': ' + e.strerror + + # Store a text file in the log directory + rev_info_filename = os.path.join(output_dir, 'revision_info.txt') + with open(rev_info_filename, "w") as text_file: + text_file.write('arguments: %s\n--------------------\n' % arg_string) + text_file.write('tensorflow version: %s\n--------------------\n' % tf.__version__) # @UndefinedVariable + text_file.write('git hash: %s\n--------------------\n' % git_hash) + text_file.write('%s' % git_diff) + +def list_variables(filename): + reader = training.NewCheckpointReader(filename) + variable_map = reader.get_variable_to_shape_map() + names = sorted(variable_map.keys()) + return names + +def put_images_on_grid(images, shape=(16,8)): + nrof_images = images.shape[0] + img_size = images.shape[1] + bw = 3 + img = np.zeros((shape[1]*(img_size+bw)+bw, shape[0]*(img_size+bw)+bw, 3), np.float32) + for i in range(shape[1]): + x_start = i*(img_size+bw)+bw + for j in range(shape[0]): + img_index = i*shape[0]+j + if img_index>=nrof_images: + break + y_start = j*(img_size+bw)+bw + img[x_start:x_start+img_size, y_start:y_start+img_size, :] = images[img_index, :, :, :] + if img_index>=nrof_images: + break + return img + +def write_arguments_to_file(args, filename): + with open(filename, 'w') as f: + for key, value in iteritems(vars(args)): + f.write('%s: %s\n' % (key, str(value))) diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/modelarts_entry_acc.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/modelarts_entry_acc.py new file mode 100644 index 0000000000000000000000000000000000000000..a66c5ee008c21d3226350037efe40aab58e96edb --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/modelarts_entry_acc.py @@ -0,0 +1,90 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import sys + +# 解析输入参数data_url +parser = argparse.ArgumentParser() +parser.add_argument("--data_url", type=str, default="/home/ma-user/modelarts/inputs/data_url_0") +parser.add_argument("--train_url", type=str, default="/home/ma-user/modelarts/outputs/train_url_0/") +config = parser.parse_args() + +print("[CANN-Modelzoo] code_dir path is [%s]" % (sys.path[0])) +code_dir = sys.path[0] +os.chdir(code_dir) +print("[CANN-Modelzoo] work_dir path is [%s]" % (os.getcwd())) + +print("[CANN-Modelzoo] before train - list my run files:") +os.system("ls -al /usr/local/Ascend/ascend-toolkit/") + +print("[CANN-Modelzoo] before train - list my dataset files:") +os.system("ls -al %s" % config.data_url) + +print("[CANN-Modelzoo] start run train shell") +# 设置sh文件格式为linux可执行 +os.system("dos2unix ./test/*") + +# 执行train_full_1p.sh或者train_performance_1p.sh,需要用户自己指定 +# full和performance的差异,performance只需要执行很少的step,控制在15分钟以内,主要关注性能FPS +os.system("bash ./test/train_full_1p.sh --data_path=%s --output_path=%s " % (config.data_url, config.train_url)) + +print("[CANN-Modelzoo] finish run train shell") + +# 将当前执行目录所有文件拷贝到obs的output进行备份 +print("[CANN-Modelzoo] after train - list my output files:") +os.system("cp -r %s %s " % (code_dir, config.train_url)) +os.system("ls -al %s" % config.train_url) diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/modelarts_entry_perf.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/modelarts_entry_perf.py new file mode 100644 index 0000000000000000000000000000000000000000..d4b6e5535c00e71c69fdb9c58f2a90e8fca1cdf5 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/modelarts_entry_perf.py @@ -0,0 +1,63 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import sys + +# 解析输入参数data_url +parser = argparse.ArgumentParser() +parser.add_argument("--data_url", type=str, default="/home/ma-user/modelarts/inputs/data_url_0") +parser.add_argument("--train_url", type=str, default="/home/ma-user/modelarts/outputs/train_url_0/") +config = parser.parse_args() + +print("[CANN-Modelzoo] code_dir path is [%s]" % (sys.path[0])) +code_dir = sys.path[0] +os.chdir(code_dir) +print("[CANN-Modelzoo] work_dir path is [%s]" % (os.getcwd())) + +print("[CANN-Modelzoo] before train - list my run files:") +os.system("ls -al /usr/local/Ascend/ascend-toolkit/") + +print("[CANN-Modelzoo] before train - list my dataset files:") +os.system("ls -al %s" % config.data_url) + +print("[CANN-Modelzoo] start run train shell") +# 设置sh文件格式为linux可执行 +os.system("dos2unix ./test/*") + +# 执行train_full_1p.sh或者train_performance_1p.sh,需要用户自己指定 +# full和performance的差异,performance只需要执行很少的step,控制在15分钟以内,主要关注性能FPS +os.system("bash ./test/train_performance_1p.sh --data_path=%s --output_path=%s " % (config.data_url, config.train_url)) + +print("[CANN-Modelzoo] finish run train shell") + +# 将当前执行目录所有文件拷贝到obs的output进行备份 +print("[CANN-Modelzoo] after train - list my output files:") +os.system("cp -r %s %s " % (code_dir, config.train_url)) +os.system("ls -al %s" % config.train_url) \ No newline at end of file diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/modelzoo_level.txt b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/modelzoo_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..956f68131ac28c32ac5e31f7ab730e6359907211 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/modelzoo_level.txt @@ -0,0 +1,6 @@ +GPUStatus:OK +NPUMigrationStatus:OK +FuncStatus:OK +PrecisionStatus:NOK +AutoTune:NOK +PerfStatus:NOK \ No newline at end of file diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/seq_nn_3d.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/seq_nn_3d.py new file mode 100644 index 0000000000000000000000000000000000000000..4505b927a50144ee828b2a9f1f6dbc4ea0517fb9 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/seq_nn_3d.py @@ -0,0 +1,578 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# /* +# * Copyright ©2019 Gaoang Wang. All rights reserved. Permission is +# * hereby granted for academic use. No other use, copying, distribution, or modification +# * is permitted without prior written consent. Copyrights for +# * third-party components of this work must be honored. Instructors +# * interested in reusing these course materials should contact the +# * author. +# */ + +import tensorflow as tf + +def weight_variable(shape): + initial = tf.truncated_normal(shape, stddev=0.1) + return tf.Variable(initial) + +def bias_variable(shape): + initial = tf.constant(0.1, shape=shape) + return tf.Variable(initial) + +def conv2d(x, W): + return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') + +def max_pool(x, s1, s2): + return tf.nn.max_pool(x, ksize=[1, s1, s2, 1], strides=[1, s1, s2, 1], padding='SAME') + +def seq_nn(batch_X_x,batch_X_y,batch_X_w,batch_X_h,batch_X_a,batch_mask_1,batch_mask_2,batch_Y,max_length,feature_size,keep_prob): + # conv5_x + W_conv1_1_x = weight_variable([1, 5, 3, 62]) + b_conv1_1_x = bias_variable([62]) + + h_concat1_1_x = tf.concat([batch_X_x, batch_mask_1], 3) + h_conv1_1_x = tf.nn.relu(conv2d(h_concat1_1_x, W_conv1_1_x) + b_conv1_1_x) + h_pool1_1_x = max_pool(h_conv1_1_x, 1, 2) + mask_pool1_1 = max_pool(batch_mask_1, 1, 2) + + W_conv1_2_x = weight_variable([1, 5, 64, 126]) + b_conv1_2_x = bias_variable([126]) + + h_concat1_2_x = tf.concat([h_pool1_1_x, mask_pool1_1], 3) + h_conv1_2_x = tf.nn.relu(conv2d(h_concat1_2_x, W_conv1_2_x) + b_conv1_2_x) + h_pool1_2_x = max_pool(h_conv1_2_x, 1, 2) + mask_pool1_2 = max_pool(mask_pool1_1, 1, 2) + + W_conv1_3_x = weight_variable([1, 5, 128, 128]) + b_conv1_3_x = bias_variable([128]) + + h_concat1_3_x = tf.concat([h_pool1_2_x, mask_pool1_2], 3) + h_conv1_3_x = tf.nn.relu(conv2d(h_concat1_3_x, W_conv1_3_x) + b_conv1_3_x) + h_pool1_3_x = max_pool(h_conv1_3_x, 1, 2) + + h_pool_flat1_x = tf.reshape(h_pool1_3_x, [-1, 1*8*128]) + + # conv5_y + W_conv1_1_y = weight_variable([1, 5, 3, 62]) + b_conv1_1_y = bias_variable([62]) + + h_concat1_1_y = tf.concat([batch_X_y, batch_mask_1], 3) + h_conv1_1_y = tf.nn.relu(conv2d(h_concat1_1_y, W_conv1_1_y) + b_conv1_1_y) + h_pool1_1_y = max_pool(h_conv1_1_y, 1, 2) + + W_conv1_2_y = weight_variable([1, 5, 64, 126]) + b_conv1_2_y = bias_variable([126]) + + h_concat1_2_y = tf.concat([h_pool1_1_y, mask_pool1_1], 3) + h_conv1_2_y = tf.nn.relu(conv2d(h_concat1_2_y, W_conv1_2_y) + b_conv1_2_y) + h_pool1_2_y = max_pool(h_conv1_2_y, 1, 2) + + W_conv1_3_y = weight_variable([1, 5, 128, 128]) + b_conv1_3_y = bias_variable([128]) + + h_concat1_3_y = tf.concat([h_pool1_2_y, mask_pool1_2], 3) + h_conv1_3_y = tf.nn.relu(conv2d(h_concat1_3_y, W_conv1_3_y) + b_conv1_3_y) + h_pool1_3_y = max_pool(h_conv1_3_y, 1, 2) + + h_pool_flat1_y = tf.reshape(h_pool1_3_y, [-1, 1*8*128]) + + # conv5_w + W_conv1_1_w = weight_variable([1, 5, 3, 62]) + b_conv1_1_w = bias_variable([62]) + + h_concat1_1_w = tf.concat([batch_X_w, batch_mask_1], 3) + h_conv1_1_w = tf.nn.relu(conv2d(h_concat1_1_w, W_conv1_1_w) + b_conv1_1_w) + h_pool1_1_w = max_pool(h_conv1_1_w, 1, 2) + + W_conv1_2_w = weight_variable([1, 5, 64, 126]) + b_conv1_2_w = bias_variable([126]) + + h_concat1_2_w = tf.concat([h_pool1_1_w, mask_pool1_1], 3) + h_conv1_2_w = tf.nn.relu(conv2d(h_concat1_2_w, W_conv1_2_w) + b_conv1_2_w) + h_pool1_2_w = max_pool(h_conv1_2_w, 1, 2) + + W_conv1_3_w = weight_variable([1, 5, 128, 128]) + b_conv1_3_w = bias_variable([128]) + + h_concat1_3_w = tf.concat([h_pool1_2_w, mask_pool1_2], 3) + h_conv1_3_w = tf.nn.relu(conv2d(h_concat1_3_w, W_conv1_3_w) + b_conv1_3_w) + h_pool1_3_w = max_pool(h_conv1_3_w, 1, 2) + + h_pool_flat1_w = tf.reshape(h_pool1_3_w, [-1, 1*8*128]) + + # conv5_h + W_conv1_1_h = weight_variable([1, 5, 3, 62]) + b_conv1_1_h = bias_variable([62]) + + h_concat1_1_h = tf.concat([batch_X_h, batch_mask_1], 3) + h_conv1_1_h = tf.nn.relu(conv2d(h_concat1_1_h, W_conv1_1_h) + b_conv1_1_h) + h_pool1_1_h = max_pool(h_conv1_1_h, 1, 2) + + W_conv1_2_h = weight_variable([1, 5, 64, 126]) + b_conv1_2_h = bias_variable([126]) + + h_concat1_2_h = tf.concat([h_pool1_1_h, mask_pool1_1], 3) + h_conv1_2_h = tf.nn.relu(conv2d(h_concat1_2_h, W_conv1_2_h) + b_conv1_2_h) + h_pool1_2_h = max_pool(h_conv1_2_h, 1, 2) + + W_conv1_3_h = weight_variable([1, 5, 128, 128]) + b_conv1_3_h = bias_variable([128]) + + h_concat1_3_h = tf.concat([h_pool1_2_h, mask_pool1_2], 3) + h_conv1_3_h = tf.nn.relu(conv2d(h_concat1_3_h, W_conv1_3_h) + b_conv1_3_h) + h_pool1_3_h = max_pool(h_conv1_3_h, 1, 2) + + h_pool_flat1_h = tf.reshape(h_pool1_3_h, [-1, 1*8*128]) + + # conv5_appear + W_conv1_1_a = weight_variable([1, 5, 3, 62]) + b_conv1_1_a = bias_variable([62]) + + h_concat1_1_a = tf.concat([batch_X_a, batch_mask_2], 3) + h_conv1_1_a = tf.nn.relu(conv2d(h_concat1_1_a, W_conv1_1_a) + b_conv1_1_a) + h_pool1_1_a = max_pool(h_conv1_1_a, 1, 2) + mask_pool1_1_a = max_pool(batch_mask_2, 1, 2) + + W_conv1_2_a = weight_variable([1, 5, 64, 126]) + b_conv1_2_a = bias_variable([126]) + + h_concat1_2_a = tf.concat([h_pool1_1_a, mask_pool1_1_a], 3) + h_conv1_2_a = tf.nn.relu(conv2d(h_concat1_2_a, W_conv1_2_a) + b_conv1_2_a) + h_pool1_2_a = max_pool(h_conv1_2_a, 1, 2) + mask_pool1_2_a = max_pool(mask_pool1_1_a, 1, 2) + + W_conv1_3_a = weight_variable([1, 5, 128, 128]) + b_conv1_3_a = bias_variable([128]) + + h_concat1_3_a = tf.concat([h_pool1_2_a, mask_pool1_2_a], 3) + h_conv1_3_a = tf.nn.relu(conv2d(h_concat1_3_a, W_conv1_3_a) + b_conv1_3_a) + h_pool1_3_a = max_pool(h_conv1_3_a, 1, 2) + #h_pool1_3_a = max_pool(h_pool1_3_a, feature_size-4, 1) + h_pool1_3_a = tf.reduce_mean(h_pool1_3_a, axis=1) + + h_pool_flat1_a = tf.reshape(h_pool1_3_a, [-1, 1*8*128]) + + # conv9_x + W_conv2_1_x = weight_variable([1, 9, 3, 62]) + b_conv2_1_x = bias_variable([62]) + + h_concat2_1_x = tf.concat([batch_X_x, batch_mask_1], 3) + h_conv2_1_x = tf.nn.relu(conv2d(h_concat2_1_x, W_conv2_1_x) + b_conv2_1_x) + h_pool2_1_x = max_pool(h_conv2_1_x, 1, 2) + mask_pool2_1 = max_pool(batch_mask_1, 1, 2) + + W_conv2_2_x = weight_variable([1, 9, 64, 126]) + b_conv2_2_x = bias_variable([126]) + + h_concat2_2_x = tf.concat([h_pool2_1_x, mask_pool2_1], 3) + h_conv2_2_x = tf.nn.relu(conv2d(h_concat2_2_x, W_conv2_2_x) + b_conv2_2_x) + h_pool2_2_x = max_pool(h_conv2_2_x, 1, 2) + mask_pool2_2 = max_pool(mask_pool2_1, 1, 2) + + W_conv2_3_x = weight_variable([1, 9, 128, 128]) + b_conv2_3_x = bias_variable([128]) + + h_concat2_3_x = tf.concat([h_pool2_2_x, mask_pool2_2], 3) + h_conv2_3_x = tf.nn.relu(conv2d(h_concat2_3_x, W_conv2_3_x) + b_conv2_3_x) + h_pool2_3_x = max_pool(h_conv2_3_x, 1, 2) + + h_pool_flat2_x = tf.reshape(h_pool2_3_x, [-1, 1*8*128]) + + # conv9_y + W_conv2_1_y = weight_variable([1, 9, 3, 62]) + b_conv2_1_y = bias_variable([62]) + + h_concat2_1_y = tf.concat([batch_X_y, batch_mask_1], 3) + h_conv2_1_y = tf.nn.relu(conv2d(h_concat2_1_y, W_conv2_1_y) + b_conv2_1_y) + h_pool2_1_y = max_pool(h_conv2_1_y, 1, 2) + + W_conv2_2_y = weight_variable([1, 9, 64, 126]) + b_conv2_2_y = bias_variable([126]) + + h_concat2_2_y = tf.concat([h_pool2_1_y, mask_pool2_1], 3) + h_conv2_2_y = tf.nn.relu(conv2d(h_concat2_2_y, W_conv2_2_y) + b_conv2_2_y) + h_pool2_2_y = max_pool(h_conv2_2_y, 1, 2) + + W_conv2_3_y = weight_variable([1, 9, 128, 128]) + b_conv2_3_y = bias_variable([128]) + + h_concat2_3_y = tf.concat([h_pool2_2_y, mask_pool2_2], 3) + h_conv2_3_y = tf.nn.relu(conv2d(h_concat2_3_y, W_conv2_3_y) + b_conv2_3_y) + h_pool2_3_y = max_pool(h_conv2_3_y, 1, 2) + + h_pool_flat2_y = tf.reshape(h_pool2_3_y, [-1, 1*8*128]) + + # conv9_w + W_conv2_1_w = weight_variable([1, 9, 3, 62]) + b_conv2_1_w = bias_variable([62]) + + h_concat2_1_w = tf.concat([batch_X_w, batch_mask_1], 3) + h_conv2_1_w = tf.nn.relu(conv2d(h_concat2_1_w, W_conv2_1_w) + b_conv2_1_w) + h_pool2_1_w = max_pool(h_conv2_1_w, 1, 2) + + W_conv2_2_w = weight_variable([1, 9, 64, 126]) + b_conv2_2_w = bias_variable([126]) + + h_concat2_2_w = tf.concat([h_pool2_1_w, mask_pool2_1], 3) + h_conv2_2_w = tf.nn.relu(conv2d(h_concat2_2_w, W_conv2_2_w) + b_conv2_2_w) + h_pool2_2_w = max_pool(h_conv2_2_w, 1, 2) + + W_conv2_3_w = weight_variable([1, 9, 128, 128]) + b_conv2_3_w = bias_variable([128]) + + h_concat2_3_w = tf.concat([h_pool2_2_w, mask_pool2_2], 3) + h_conv2_3_w = tf.nn.relu(conv2d(h_concat2_3_w, W_conv2_3_w) + b_conv2_3_w) + h_pool2_3_w = max_pool(h_conv2_3_w, 1, 2) + + h_pool_flat2_w = tf.reshape(h_pool2_3_w, [-1, 1*8*128]) + + # conv9_h + W_conv2_1_h = weight_variable([1, 9, 3, 62]) + b_conv2_1_h = bias_variable([62]) + + h_concat2_1_h = tf.concat([batch_X_h, batch_mask_1], 3) + h_conv2_1_h = tf.nn.relu(conv2d(h_concat2_1_h, W_conv2_1_h) + b_conv2_1_h) + h_pool2_1_h = max_pool(h_conv2_1_h, 1, 2) + + W_conv2_2_h = weight_variable([1, 9, 64, 126]) + b_conv2_2_h = bias_variable([126]) + + h_concat2_2_h = tf.concat([h_pool2_1_h, mask_pool2_1], 3) + h_conv2_2_h = tf.nn.relu(conv2d(h_concat2_2_h, W_conv2_2_h) + b_conv2_2_h) + h_pool2_2_h = max_pool(h_conv2_2_h, 1, 2) + + W_conv2_3_h = weight_variable([1, 9, 128, 128]) + b_conv2_3_h = bias_variable([128]) + + h_concat2_3_h = tf.concat([h_pool2_2_h, mask_pool2_2], 3) + h_conv2_3_h = tf.nn.relu(conv2d(h_concat2_3_h, W_conv2_3_h) + b_conv2_3_h) + h_pool2_3_h = max_pool(h_conv2_3_h, 1, 2) + + h_pool_flat2_h = tf.reshape(h_pool2_3_h, [-1, 1*8*128]) + + # conv9_appear + W_conv2_1_a = weight_variable([1, 9, 3, 62]) + b_conv2_1_a = bias_variable([62]) + + h_concat2_1_a = tf.concat([batch_X_a, batch_mask_2], 3) + h_conv2_1_a = tf.nn.relu(conv2d(h_concat2_1_a, W_conv2_1_a) + b_conv2_1_a) + h_pool2_1_a = max_pool(h_conv2_1_a, 1, 2) + mask_pool2_1_a = max_pool(batch_mask_2, 1, 2) + + W_conv2_2_a = weight_variable([1, 9, 64, 126]) + b_conv2_2_a = bias_variable([126]) + + h_concat2_2_a = tf.concat([h_pool2_1_a, mask_pool2_1_a], 3) + h_conv2_2_a = tf.nn.relu(conv2d(h_concat2_2_a, W_conv2_2_a) + b_conv2_2_a) + h_pool2_2_a = max_pool(h_conv2_2_a, 1, 2) + mask_pool2_2_a = max_pool(mask_pool2_1_a, 1, 2) + + W_conv2_3_a = weight_variable([1, 9, 128, 128]) + b_conv2_3_a = bias_variable([128]) + + h_concat2_3_a = tf.concat([h_pool2_2_a, mask_pool2_2_a], 3) + h_conv2_3_a = tf.nn.relu(conv2d(h_concat2_3_a, W_conv2_3_a) + b_conv2_3_a) + h_pool2_3_a = max_pool(h_conv2_3_a, 1, 2) + #h_pool2_3_a = max_pool(h_pool2_3_a, feature_size-4, 1) + h_pool2_3_a = tf.reduce_mean(h_pool2_3_a, axis=1) + + h_pool_flat2_a = tf.reshape(h_pool2_3_a, [-1, 1*8*128]) + + # conv13_x + W_conv3_1_x = weight_variable([1, 13, 3, 62]) + b_conv3_1_x = bias_variable([62]) + + h_concat3_1_x = tf.concat([batch_X_x, batch_mask_1], 3) + h_conv3_1_x = tf.nn.relu(conv2d(h_concat3_1_x, W_conv3_1_x) + b_conv3_1_x) + h_pool3_1_x = max_pool(h_conv3_1_x, 1, 2) + mask_pool3_1 = max_pool(batch_mask_1, 1, 2) + + W_conv3_2_x = weight_variable([1, 13, 64, 126]) + b_conv3_2_x = bias_variable([126]) + + h_concat3_2_x = tf.concat([h_pool3_1_x, mask_pool3_1], 3) + h_conv3_2_x = tf.nn.relu(conv2d(h_concat3_2_x, W_conv3_2_x) + b_conv3_2_x) + h_pool3_2_x = max_pool(h_conv3_2_x, 1, 2) + mask_pool3_2 = max_pool(mask_pool3_1, 1, 2) + + W_conv3_3_x = weight_variable([1, 13, 128, 128]) + b_conv3_3_x = bias_variable([128]) + + h_concat3_3_x = tf.concat([h_pool3_2_x, mask_pool3_2], 3) + h_conv3_3_x = tf.nn.relu(conv2d(h_concat3_3_x, W_conv3_3_x) + b_conv3_3_x) + h_pool3_3_x = max_pool(h_conv3_3_x, 1, 2) + + h_pool_flat3_x = tf.reshape(h_pool3_3_x, [-1, 1*8*128]) + + # conv13_y + W_conv3_1_y = weight_variable([1, 13, 3, 62]) + b_conv3_1_y = bias_variable([62]) + + h_concat3_1_y = tf.concat([batch_X_y, batch_mask_1], 3) + h_conv3_1_y = tf.nn.relu(conv2d(h_concat3_1_y, W_conv3_1_y) + b_conv3_1_y) + h_pool3_1_y = max_pool(h_conv3_1_y, 1, 2) + + W_conv3_2_y = weight_variable([1, 13, 64, 126]) + b_conv3_2_y = bias_variable([126]) + + h_concat3_2_y = tf.concat([h_pool3_1_y, mask_pool3_1], 3) + h_conv3_2_y = tf.nn.relu(conv2d(h_concat3_2_y, W_conv3_2_y) + b_conv3_2_y) + h_pool3_2_y = max_pool(h_conv3_2_y, 1, 2) + + W_conv3_3_y = weight_variable([1, 13, 128, 128]) + b_conv3_3_y = bias_variable([128]) + + h_concat3_3_y = tf.concat([h_pool3_2_y, mask_pool3_2], 3) + h_conv3_3_y = tf.nn.relu(conv2d(h_concat3_3_y, W_conv3_3_y) + b_conv3_3_y) + h_pool3_3_y = max_pool(h_conv3_3_y, 1, 2) + + h_pool_flat3_y = tf.reshape(h_pool3_3_y, [-1, 1*8*128]) + + # conv13_w + W_conv3_1_w = weight_variable([1, 13, 3, 62]) + b_conv3_1_w = bias_variable([62]) + + h_concat3_1_w = tf.concat([batch_X_w, batch_mask_1], 3) + h_conv3_1_w = tf.nn.relu(conv2d(h_concat3_1_w, W_conv3_1_w) + b_conv3_1_w) + h_pool3_1_w = max_pool(h_conv3_1_w, 1, 2) + + W_conv3_2_w = weight_variable([1, 13, 64, 126]) + b_conv3_2_w = bias_variable([126]) + + h_concat3_2_w = tf.concat([h_pool3_1_w, mask_pool3_1], 3) + h_conv3_2_w = tf.nn.relu(conv2d(h_concat3_2_w, W_conv3_2_w) + b_conv3_2_w) + h_pool3_2_w = max_pool(h_conv3_2_w, 1, 2) + + W_conv3_3_w = weight_variable([1, 13, 128, 128]) + b_conv3_3_w = bias_variable([128]) + + h_concat3_3_w = tf.concat([h_pool3_2_w, mask_pool3_2], 3) + h_conv3_3_w = tf.nn.relu(conv2d(h_concat3_3_w, W_conv3_3_w) + b_conv3_3_w) + h_pool3_3_w = max_pool(h_conv3_3_w, 1, 2) + + h_pool_flat3_w = tf.reshape(h_pool3_3_w, [-1, 1*8*128]) + + # conv13_h + W_conv3_1_h = weight_variable([1, 13, 3, 62]) + b_conv3_1_h = bias_variable([62]) + + h_concat3_1_h = tf.concat([batch_X_h, batch_mask_1], 3) + h_conv3_1_h = tf.nn.relu(conv2d(h_concat3_1_h, W_conv3_1_h) + b_conv3_1_h) + h_pool3_1_h = max_pool(h_conv3_1_h, 1, 2) + + W_conv3_2_h = weight_variable([1, 13, 64, 126]) + b_conv3_2_h = bias_variable([126]) + + h_concat3_2_h = tf.concat([h_pool3_1_h, mask_pool3_1], 3) + h_conv3_2_h = tf.nn.relu(conv2d(h_concat3_2_h, W_conv3_2_h) + b_conv3_2_h) + h_pool3_2_h = max_pool(h_conv3_2_h, 1, 2) + + W_conv3_3_h = weight_variable([1, 13, 128, 128]) + b_conv3_3_h = bias_variable([128]) + + h_concat3_3_h = tf.concat([h_pool3_2_h, mask_pool3_2], 3) + h_conv3_3_h = tf.nn.relu(conv2d(h_concat3_3_h, W_conv3_3_h) + b_conv3_3_h) + h_pool3_3_h = max_pool(h_conv3_3_h, 1, 2) + + h_pool_flat3_h = tf.reshape(h_pool3_3_h, [-1, 1*8*128]) + + # conv13_appear + W_conv3_1_a = weight_variable([1, 13, 3, 62]) + b_conv3_1_a = bias_variable([62]) + + h_concat3_1_a = tf.concat([batch_X_a, batch_mask_2], 3) + h_conv3_1_a = tf.nn.relu(conv2d(h_concat3_1_a, W_conv3_1_a) + b_conv3_1_a) + h_pool3_1_a = max_pool(h_conv3_1_a, 1, 2) + mask_pool3_1_a = max_pool(batch_mask_2, 1, 2) + + W_conv3_2_a = weight_variable([1, 13, 64, 126]) + b_conv3_2_a = bias_variable([126]) + + h_concat3_2_a = tf.concat([h_pool3_1_a, mask_pool3_1_a], 3) + h_conv3_2_a = tf.nn.relu(conv2d(h_concat3_2_a, W_conv3_2_a) + b_conv3_2_a) + h_pool3_2_a = max_pool(h_conv3_2_a, 1, 2) + mask_pool3_2_a = max_pool(mask_pool3_1_a, 1, 2) + + W_conv3_3_a = weight_variable([1, 13, 128, 128]) + b_conv3_3_a = bias_variable([128]) + + h_concat3_3_a = tf.concat([h_pool3_2_a, mask_pool3_2_a], 3) + h_conv3_3_a = tf.nn.relu(conv2d(h_concat3_3_a, W_conv3_3_a) + b_conv3_3_a) + h_pool3_3_a = max_pool(h_conv3_3_a, 1, 2) + #h_pool3_3_a = max_pool(h_pool3_3_a, feature_size-4, 1) + h_pool3_3_a = tf.reduce_mean(h_pool3_3_a, axis=1) + + h_pool_flat3_a = tf.reshape(h_pool3_3_a, [-1, 1*8*128]) + + + # conv3_x + W_conv4_1_x = weight_variable([1, 13, 3, 62]) + b_conv4_1_x = bias_variable([62]) + + h_concat4_1_x = tf.concat([batch_X_x, batch_mask_1], 3) + h_conv4_1_x = tf.nn.relu(conv2d(h_concat4_1_x, W_conv4_1_x) + b_conv4_1_x) + h_pool4_1_x = max_pool(h_conv4_1_x, 1, 2) + mask_pool4_1 = max_pool(batch_mask_1, 1, 2) + + W_conv4_2_x = weight_variable([1, 13, 64, 126]) + b_conv4_2_x = bias_variable([126]) + + h_concat4_2_x = tf.concat([h_pool4_1_x, mask_pool4_1], 3) + h_conv4_2_x = tf.nn.relu(conv2d(h_concat4_2_x, W_conv4_2_x) + b_conv4_2_x) + h_pool4_2_x = max_pool(h_conv4_2_x, 1, 2) + mask_pool4_2 = max_pool(mask_pool4_1, 1, 2) + + W_conv4_3_x = weight_variable([1, 13, 128, 128]) + b_conv4_3_x = bias_variable([128]) + + h_concat4_3_x = tf.concat([h_pool4_2_x, mask_pool4_2], 3) + h_conv4_3_x = tf.nn.relu(conv2d(h_concat4_3_x, W_conv4_3_x) + b_conv4_3_x) + h_pool4_3_x = max_pool(h_conv4_3_x, 1, 2) + + h_pool_flat4_x = tf.reshape(h_pool4_3_x, [-1, 1*8*128]) + + # conv3_y + W_conv4_1_y = weight_variable([1, 13, 3, 62]) + b_conv4_1_y = bias_variable([62]) + + h_concat4_1_y = tf.concat([batch_X_y, batch_mask_1], 3) + h_conv4_1_y = tf.nn.relu(conv2d(h_concat4_1_y, W_conv4_1_y) + b_conv4_1_y) + h_pool4_1_y = max_pool(h_conv4_1_y, 1, 2) + + W_conv4_2_y = weight_variable([1, 13, 64, 126]) + b_conv4_2_y = bias_variable([126]) + + h_concat4_2_y = tf.concat([h_pool4_1_y, mask_pool4_1], 3) + h_conv4_2_y = tf.nn.relu(conv2d(h_concat4_2_y, W_conv4_2_y) + b_conv4_2_y) + h_pool4_2_y = max_pool(h_conv4_2_y, 1, 2) + + W_conv4_3_y = weight_variable([1, 13, 128, 128]) + b_conv4_3_y = bias_variable([128]) + + h_concat4_3_y = tf.concat([h_pool4_2_y, mask_pool4_2], 3) + h_conv4_3_y = tf.nn.relu(conv2d(h_concat4_3_y, W_conv4_3_y) + b_conv4_3_y) + h_pool4_3_y = max_pool(h_conv4_3_y, 1, 2) + + h_pool_flat4_y = tf.reshape(h_pool4_3_y, [-1, 1*8*128]) + + # conv3_w + W_conv4_1_w = weight_variable([1, 13, 3, 62]) + b_conv4_1_w = bias_variable([62]) + + h_concat4_1_w = tf.concat([batch_X_w, batch_mask_1], 3) + h_conv4_1_w = tf.nn.relu(conv2d(h_concat4_1_w, W_conv4_1_w) + b_conv4_1_w) + h_pool4_1_w = max_pool(h_conv4_1_w, 1, 2) + + W_conv4_2_w = weight_variable([1, 13, 64, 126]) + b_conv4_2_w = bias_variable([126]) + + h_concat4_2_w = tf.concat([h_pool4_1_w, mask_pool4_1], 3) + h_conv4_2_w = tf.nn.relu(conv2d(h_concat4_2_w, W_conv4_2_w) + b_conv4_2_w) + h_pool4_2_w = max_pool(h_conv4_2_w, 1, 2) + + W_conv4_3_w = weight_variable([1, 13, 128, 128]) + b_conv4_3_w = bias_variable([128]) + + h_concat4_3_w = tf.concat([h_pool4_2_w, mask_pool4_2], 3) + h_conv4_3_w = tf.nn.relu(conv2d(h_concat4_3_w, W_conv4_3_w) + b_conv4_3_w) + h_pool4_3_w = max_pool(h_conv4_3_w, 1, 2) + + h_pool_flat4_w = tf.reshape(h_pool4_3_w, [-1, 1*8*128]) + + # conv13_h + W_conv4_1_h = weight_variable([1, 13, 3, 62]) + b_conv4_1_h = bias_variable([62]) + + h_concat4_1_h = tf.concat([batch_X_h, batch_mask_1], 3) + h_conv4_1_h = tf.nn.relu(conv2d(h_concat4_1_h, W_conv4_1_h) + b_conv4_1_h) + h_pool4_1_h = max_pool(h_conv4_1_h, 1, 2) + + W_conv4_2_h = weight_variable([1, 13, 64, 126]) + b_conv4_2_h = bias_variable([126]) + + h_concat4_2_h = tf.concat([h_pool4_1_h, mask_pool4_1], 3) + h_conv4_2_h = tf.nn.relu(conv2d(h_concat4_2_h, W_conv4_2_h) + b_conv4_2_h) + h_pool4_2_h = max_pool(h_conv4_2_h, 1, 2) + + W_conv4_3_h = weight_variable([1, 13, 128, 128]) + b_conv4_3_h = bias_variable([128]) + + h_concat4_3_h = tf.concat([h_pool4_2_h, mask_pool4_2], 3) + h_conv4_3_h = tf.nn.relu(conv2d(h_concat4_3_h, W_conv4_3_h) + b_conv4_3_h) + h_pool4_3_h = max_pool(h_conv4_3_h, 1, 2) + + h_pool_flat4_h = tf.reshape(h_pool4_3_h, [-1, 1*8*128]) + + # conv3_appear + W_conv4_1_a = weight_variable([1, 13, 3, 62]) + b_conv4_1_a = bias_variable([62]) + + h_concat4_1_a = tf.concat([batch_X_a, batch_mask_2], 3) + h_conv4_1_a = tf.nn.relu(conv2d(h_concat4_1_a, W_conv4_1_a) + b_conv4_1_a) + h_pool4_1_a = max_pool(h_conv4_1_a, 1, 2) + mask_pool4_1_a = max_pool(batch_mask_2, 1, 2) + + W_conv4_2_a = weight_variable([1, 13, 64, 126]) + b_conv4_2_a = bias_variable([126]) + + h_concat4_2_a = tf.concat([h_pool4_1_a, mask_pool4_1_a], 3) + h_conv4_2_a = tf.nn.relu(conv2d(h_concat4_2_a, W_conv4_2_a) + b_conv4_2_a) + h_pool4_2_a = max_pool(h_conv4_2_a, 1, 2) + mask_pool4_2_a = max_pool(mask_pool4_1_a, 1, 2) + + W_conv4_3_a = weight_variable([1, 13, 128, 128]) + b_conv4_3_a = bias_variable([128]) + + h_concat4_3_a = tf.concat([h_pool4_2_a, mask_pool4_2_a], 3) + h_conv4_3_a = tf.nn.relu(conv2d(h_concat4_3_a, W_conv4_3_a) + b_conv4_3_a) + h_pool4_3_a = max_pool(h_conv4_3_a, 1, 2) + #h_pool4_3_a = max_pool(h_pool4_3_a, feature_size-4, 1) + h_pool4_3_a = tf.reduce_mean(h_pool4_3_a, axis=1) + + h_pool_flat4_a = tf.reshape(h_pool4_3_a, [-1, 1*8*128]) + + + # fc + h_pool_flat = tf.concat([h_pool_flat1_x, h_pool_flat1_y, h_pool_flat1_w, h_pool_flat1_h, h_pool_flat1_a, + h_pool_flat2_x, h_pool_flat2_y, h_pool_flat2_w, h_pool_flat2_h, h_pool_flat2_a, + h_pool_flat3_x, h_pool_flat3_y, h_pool_flat3_w, h_pool_flat3_h, h_pool_flat3_a, + h_pool_flat4_x, h_pool_flat4_y, h_pool_flat4_w, h_pool_flat4_h, h_pool_flat4_a], 1) + + W_fc1 = weight_variable([20 * 8 * 128, 1024]) + b_fc1 = bias_variable([1024]) + + h_fc1 = tf.nn.relu(tf.matmul(h_pool_flat, W_fc1) + b_fc1) + + h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) + + W_fc2 = weight_variable([1024, 2]) + b_fc2 = bias_variable([2]) + + y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 + + return y_conv diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/seq_nn_3d_v2.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/seq_nn_3d_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..f5df91b9a32ae37054a919c72c7716092a481781 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/seq_nn_3d_v2.py @@ -0,0 +1,582 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# /* +# * Copyright ©2019 Gaoang Wang. All rights reserved. Permission is +# * hereby granted for academic use. No other use, copying, distribution, or modification +# * is permitted without prior written consent. Copyrights for +# * third-party components of this work must be honored. Instructors +# * interested in reusing these course materials should contact the +# * author. +# */ +# +import tensorflow as tf + +def weight_variable(shape): + initial = tf.truncated_normal(shape, stddev=0.1) + return tf.Variable(initial) + +def bias_variable(shape): + initial = tf.constant(0.1, shape=shape) + return tf.Variable(initial) + +def conv2d(x, W): + return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') + +def max_pool(x, s1, s2): + return tf.nn.max_pool(x, ksize=[1, s1, s2, 1], strides=[1, s1, s2, 1], padding='SAME') + +def seq_nn(batch_X_x,batch_X_y,batch_X_w,batch_X_h,batch_X_a,batch_mask_1,batch_mask_2,batch_Y,max_length,feature_size,keep_prob): + # conv1_x + W_conv4_1_x = weight_variable([1, 3, 3, 16]) + b_conv4_1_x = bias_variable([16]) + + h_concat4_1_x = tf.concat([batch_X_x, batch_mask_1], 3) + h_conv4_1_x = tf.nn.relu(conv2d(h_concat4_1_x, W_conv4_1_x) + b_conv4_1_x) + h_pool4_1_x = max_pool(h_conv4_1_x, 1, 2) + + W_conv1_1_x = weight_variable([1, 5, 3, 16]) + b_conv1_1_x = bias_variable([16]) + + h_concat1_1_x = tf.concat([batch_X_x, batch_mask_1], 3) + h_conv1_1_x = tf.nn.relu(conv2d(h_concat1_1_x, W_conv1_1_x) + b_conv1_1_x) + h_pool1_1_x = max_pool(h_conv1_1_x, 1, 2) + + W_conv2_1_x = weight_variable([1, 9, 3, 16]) + b_conv2_1_x = bias_variable([16]) + + h_concat2_1_x = tf.concat([batch_X_x, batch_mask_1], 3) + h_conv2_1_x = tf.nn.relu(conv2d(h_concat2_1_x, W_conv2_1_x) + b_conv2_1_x) + h_pool2_1_x = max_pool(h_conv2_1_x, 1, 2) + + W_conv3_1_x = weight_variable([1, 13, 3, 16]) + b_conv3_1_x = bias_variable([16]) + + h_concat3_1_x = tf.concat([batch_X_x, batch_mask_1], 3) + h_conv3_1_x = tf.nn.relu(conv2d(h_concat3_1_x, W_conv3_1_x) + b_conv3_1_x) + h_pool3_1_x = max_pool(h_conv3_1_x, 1, 2) + + mask_pool4_1 = max_pool(batch_mask_1, 1, 2) + + conv1_x = tf.concat([h_pool4_1_x, h_pool1_1_x], 3) + conv1_x = tf.concat([conv1_x, h_pool2_1_x], 3) + conv1_x = tf.concat([conv1_x, h_pool3_1_x], 3) + conv1_x = tf.concat([conv1_x, mask_pool4_1], 3) + + # conv2_x + W_conv4_2_x = weight_variable([1, 3, 66, 64]) + b_conv4_2_x = bias_variable([64]) + + h_conv4_2_x = tf.nn.relu(conv2d(conv1_x, W_conv4_2_x) + b_conv4_2_x) + h_pool4_2_x = max_pool(h_conv4_2_x, 1, 2) + + W_conv1_2_x = weight_variable([1, 5, 66, 64]) + b_conv1_2_x = bias_variable([64]) + + h_conv1_2_x = tf.nn.relu(conv2d(conv1_x, W_conv1_2_x) + b_conv1_2_x) + h_pool1_2_x = max_pool(h_conv1_2_x, 1, 2) + + W_conv2_2_x = weight_variable([1, 9, 66, 64]) + b_conv2_2_x = bias_variable([64]) + + h_conv2_2_x = tf.nn.relu(conv2d(conv1_x, W_conv2_2_x) + b_conv2_2_x) + h_pool2_2_x = max_pool(h_conv2_2_x, 1, 2) + + W_conv3_2_x = weight_variable([1, 13, 66, 64]) + b_conv3_2_x = bias_variable([64]) + + h_conv3_2_x = tf.nn.relu(conv2d(conv1_x, W_conv3_2_x) + b_conv3_2_x) + h_pool3_2_x = max_pool(h_conv3_2_x, 1, 2) + + mask_pool4_2 = max_pool(mask_pool4_1, 1, 2) + + conv2_x = tf.concat([h_pool4_2_x, h_pool1_2_x], 3) + conv2_x = tf.concat([conv2_x, h_pool2_2_x], 3) + conv2_x = tf.concat([conv2_x, h_pool3_2_x], 3) + conv2_x = tf.concat([conv2_x, mask_pool4_2], 3) + + # conv3_x + W_conv4_3_x = weight_variable([1, 3, 258, 128]) + b_conv4_3_x = bias_variable([128]) + + h_conv4_3_x = tf.nn.relu(conv2d(conv2_x, W_conv4_3_x) + b_conv4_3_x) + h_pool4_3_x = max_pool(h_conv4_3_x, 1, 2) + + h_pool_flat4_x = tf.reshape(h_pool4_3_x, [-1, 1*8*128]) + + W_conv1_3_x = weight_variable([1, 5, 258, 128]) + b_conv1_3_x = bias_variable([128]) + + h_conv1_3_x = tf.nn.relu(conv2d(conv2_x, W_conv1_3_x) + b_conv1_3_x) + h_pool1_3_x = max_pool(h_conv1_3_x, 1, 2) + + h_pool_flat1_x = tf.reshape(h_pool1_3_x, [-1, 1*8*128]) + + W_conv2_3_x = weight_variable([1, 9, 258, 128]) + b_conv2_3_x = bias_variable([128]) + + h_conv2_3_x = tf.nn.relu(conv2d(conv2_x, W_conv2_3_x) + b_conv2_3_x) + h_pool2_3_x = max_pool(h_conv2_3_x, 1, 2) + + h_pool_flat2_x = tf.reshape(h_pool2_3_x, [-1, 1*8*128]) + + W_conv3_3_x = weight_variable([1, 13, 258, 128]) + b_conv3_3_x = bias_variable([128]) + + h_conv3_3_x = tf.nn.relu(conv2d(conv2_x, W_conv3_3_x) + b_conv3_3_x) + h_pool3_3_x = max_pool(h_conv3_3_x, 1, 2) + + h_pool_flat3_x = tf.reshape(h_pool3_3_x, [-1, 1*8*128]) + + + + # conv1_y + W_conv4_1_y = weight_variable([1, 3, 3, 16]) + b_conv4_1_y = bias_variable([16]) + + h_concat4_1_y = tf.concat([batch_X_y, batch_mask_1], 3) + h_conv4_1_y = tf.nn.relu(conv2d(h_concat4_1_y, W_conv4_1_y) + b_conv4_1_y) + h_pool4_1_y = max_pool(h_conv4_1_y, 1, 2) + + W_conv1_1_y = weight_variable([1, 5, 3, 16]) + b_conv1_1_y = bias_variable([16]) + + h_concat1_1_y = tf.concat([batch_X_y, batch_mask_1], 3) + h_conv1_1_y = tf.nn.relu(conv2d(h_concat1_1_y, W_conv1_1_y) + b_conv1_1_y) + h_pool1_1_y = max_pool(h_conv1_1_y, 1, 2) + + W_conv2_1_y = weight_variable([1, 9, 3, 16]) + b_conv2_1_y = bias_variable([16]) + + h_concat2_1_y = tf.concat([batch_X_y, batch_mask_1], 3) + h_conv2_1_y = tf.nn.relu(conv2d(h_concat2_1_y, W_conv2_1_y) + b_conv2_1_y) + h_pool2_1_y = max_pool(h_conv2_1_y, 1, 2) + + W_conv3_1_y = weight_variable([1, 13, 3, 16]) + b_conv3_1_y = bias_variable([16]) + + h_concat3_1_y = tf.concat([batch_X_y, batch_mask_1], 3) + h_conv3_1_y = tf.nn.relu(conv2d(h_concat3_1_y, W_conv3_1_y) + b_conv3_1_y) + h_pool3_1_y = max_pool(h_conv3_1_y, 1, 2) + + conv1_y = tf.concat([h_pool4_1_y, h_pool1_1_y], 3) + conv1_y = tf.concat([conv1_y, h_pool2_1_y], 3) + conv1_y = tf.concat([conv1_y, h_pool3_1_y], 3) + conv1_y = tf.concat([conv1_y, mask_pool4_1], 3) + + # conv2_y + W_conv4_2_y = weight_variable([1, 3, 66, 64]) + b_conv4_2_y = bias_variable([64]) + + h_conv4_2_y = tf.nn.relu(conv2d(conv1_y, W_conv4_2_y) + b_conv4_2_y) + h_pool4_2_y = max_pool(h_conv4_2_y, 1, 2) + + W_conv1_2_y = weight_variable([1, 5, 66, 64]) + b_conv1_2_y = bias_variable([64]) + + h_conv1_2_y = tf.nn.relu(conv2d(conv1_y, W_conv1_2_y) + b_conv1_2_y) + h_pool1_2_y = max_pool(h_conv1_2_y, 1, 2) + + W_conv2_2_y = weight_variable([1, 9, 66, 64]) + b_conv2_2_y = bias_variable([64]) + + h_conv2_2_y = tf.nn.relu(conv2d(conv1_y, W_conv2_2_y) + b_conv2_2_y) + h_pool2_2_y = max_pool(h_conv2_2_y, 1, 2) + + W_conv3_2_y = weight_variable([1, 13, 66, 64]) + b_conv3_2_y = bias_variable([64]) + + h_conv3_2_y = tf.nn.relu(conv2d(conv1_y, W_conv3_2_y) + b_conv3_2_y) + h_pool3_2_y = max_pool(h_conv3_2_y, 1, 2) + + conv2_y = tf.concat([h_pool4_2_y, h_pool1_2_y], 3) + conv2_y = tf.concat([conv2_y, h_pool2_2_y], 3) + conv2_y = tf.concat([conv2_y, h_pool3_2_y], 3) + conv2_y = tf.concat([conv2_y, mask_pool4_2], 3) + + # conv3_y + W_conv4_3_y = weight_variable([1, 3, 258, 128]) + b_conv4_3_y = bias_variable([128]) + + h_conv4_3_y = tf.nn.relu(conv2d(conv2_y, W_conv4_3_y) + b_conv4_3_y) + h_pool4_3_y = max_pool(h_conv4_3_y, 1, 2) + + h_pool_flat4_y = tf.reshape(h_pool4_3_y, [-1, 1*8*128]) + + W_conv1_3_y = weight_variable([1, 5, 258, 128]) + b_conv1_3_y = bias_variable([128]) + + h_conv1_3_y = tf.nn.relu(conv2d(conv2_y, W_conv1_3_y) + b_conv1_3_y) + h_pool1_3_y = max_pool(h_conv1_3_y, 1, 2) + + h_pool_flat1_y = tf.reshape(h_pool1_3_y, [-1, 1*8*128]) + + W_conv2_3_y = weight_variable([1, 9, 258, 128]) + b_conv2_3_y = bias_variable([128]) + + h_conv2_3_y = tf.nn.relu(conv2d(conv2_y, W_conv2_3_y) + b_conv2_3_y) + h_pool2_3_y = max_pool(h_conv2_3_y, 1, 2) + + h_pool_flat2_y = tf.reshape(h_pool2_3_y, [-1, 1*8*128]) + + W_conv3_3_y = weight_variable([1, 13, 258, 128]) + b_conv3_3_y = bias_variable([128]) + + h_conv3_3_y = tf.nn.relu(conv2d(conv2_y, W_conv3_3_y) + b_conv3_3_y) + h_pool3_3_y = max_pool(h_conv3_3_y, 1, 2) + + h_pool_flat3_y = tf.reshape(h_pool3_3_y, [-1, 1*8*128]) + + + # conv1_w + W_conv4_1_w = weight_variable([1, 3, 3, 16]) + b_conv4_1_w = bias_variable([16]) + + h_concat4_1_w = tf.concat([batch_X_w, batch_mask_1], 3) + h_conv4_1_w = tf.nn.relu(conv2d(h_concat4_1_w, W_conv4_1_w) + b_conv4_1_w) + h_pool4_1_w = max_pool(h_conv4_1_w, 1, 2) + + W_conv1_1_w = weight_variable([1, 5, 3, 16]) + b_conv1_1_w = bias_variable([16]) + + h_concat1_1_w = tf.concat([batch_X_w, batch_mask_1], 3) + h_conv1_1_w = tf.nn.relu(conv2d(h_concat1_1_w, W_conv1_1_w) + b_conv1_1_w) + h_pool1_1_w = max_pool(h_conv1_1_w, 1, 2) + + W_conv2_1_w = weight_variable([1, 9, 3, 16]) + b_conv2_1_w = bias_variable([16]) + + h_concat2_1_w = tf.concat([batch_X_w, batch_mask_1], 3) + h_conv2_1_w = tf.nn.relu(conv2d(h_concat2_1_w, W_conv2_1_w) + b_conv2_1_w) + h_pool2_1_w = max_pool(h_conv2_1_w, 1, 2) + + W_conv3_1_w = weight_variable([1, 13, 3, 16]) + b_conv3_1_w = bias_variable([16]) + + h_concat3_1_w = tf.concat([batch_X_w, batch_mask_1], 3) + h_conv3_1_w = tf.nn.relu(conv2d(h_concat3_1_w, W_conv3_1_w) + b_conv3_1_w) + h_pool3_1_w = max_pool(h_conv3_1_w, 1, 2) + + conv1_w = tf.concat([h_pool4_1_w, h_pool1_1_w], 3) + conv1_w = tf.concat([conv1_w, h_pool2_1_w], 3) + conv1_w = tf.concat([conv1_w, h_pool3_1_w], 3) + conv1_w = tf.concat([conv1_w, mask_pool4_1], 3) + + # conv2_w + W_conv4_2_w = weight_variable([1, 3, 66, 64]) + b_conv4_2_w = bias_variable([64]) + + h_conv4_2_w = tf.nn.relu(conv2d(conv1_w, W_conv4_2_w) + b_conv4_2_w) + h_pool4_2_w = max_pool(h_conv4_2_w, 1, 2) + + W_conv1_2_w = weight_variable([1, 5, 66, 64]) + b_conv1_2_w = bias_variable([64]) + + h_conv1_2_w = tf.nn.relu(conv2d(conv1_w, W_conv1_2_w) + b_conv1_2_w) + h_pool1_2_w = max_pool(h_conv1_2_w, 1, 2) + + W_conv2_2_w = weight_variable([1, 9, 66, 64]) + b_conv2_2_w = bias_variable([64]) + + h_conv2_2_w = tf.nn.relu(conv2d(conv1_w, W_conv2_2_w) + b_conv2_2_w) + h_pool2_2_w = max_pool(h_conv2_2_w, 1, 2) + + W_conv3_2_w = weight_variable([1, 13, 66, 64]) + b_conv3_2_w = bias_variable([64]) + + h_conv3_2_w = tf.nn.relu(conv2d(conv1_w, W_conv3_2_w) + b_conv3_2_w) + h_pool3_2_w = max_pool(h_conv3_2_w, 1, 2) + + conv2_w = tf.concat([h_pool4_2_w, h_pool1_2_w], 3) + conv2_w = tf.concat([conv2_w, h_pool2_2_w], 3) + conv2_w = tf.concat([conv2_w, h_pool3_2_w], 3) + conv2_w = tf.concat([conv2_w, mask_pool4_2], 3) + + # conv3_w + W_conv4_3_w = weight_variable([1, 3, 258, 128]) + b_conv4_3_w = bias_variable([128]) + + h_conv4_3_w = tf.nn.relu(conv2d(conv2_w, W_conv4_3_w) + b_conv4_3_w) + h_pool4_3_w = max_pool(h_conv4_3_w, 1, 2) + + h_pool_flat4_w = tf.reshape(h_pool4_3_w, [-1, 1*8*128]) + + W_conv1_3_w = weight_variable([1, 5, 258, 128]) + b_conv1_3_w = bias_variable([128]) + + h_conv1_3_w = tf.nn.relu(conv2d(conv2_w, W_conv1_3_w) + b_conv1_3_w) + h_pool1_3_w = max_pool(h_conv1_3_w, 1, 2) + + h_pool_flat1_w = tf.reshape(h_pool1_3_w, [-1, 1*8*128]) + + W_conv2_3_w = weight_variable([1, 9, 258, 128]) + b_conv2_3_w = bias_variable([128]) + + h_conv2_3_w = tf.nn.relu(conv2d(conv2_w, W_conv2_3_w) + b_conv2_3_w) + h_pool2_3_w = max_pool(h_conv2_3_w, 1, 2) + + h_pool_flat2_w = tf.reshape(h_pool2_3_w, [-1, 1*8*128]) + + W_conv3_3_w = weight_variable([1, 13, 258, 128]) + b_conv3_3_w = bias_variable([128]) + + h_conv3_3_w = tf.nn.relu(conv2d(conv2_w, W_conv3_3_w) + b_conv3_3_w) + h_pool3_3_w = max_pool(h_conv3_3_w, 1, 2) + + h_pool_flat3_w = tf.reshape(h_pool3_3_w, [-1, 1*8*128]) + + + # conv1_h + W_conv4_1_h = weight_variable([1, 3, 3, 16]) + b_conv4_1_h = bias_variable([16]) + + h_concat4_1_h = tf.concat([batch_X_h, batch_mask_1], 3) + h_conv4_1_h = tf.nn.relu(conv2d(h_concat4_1_h, W_conv4_1_h) + b_conv4_1_h) + h_pool4_1_h = max_pool(h_conv4_1_h, 1, 2) + + W_conv1_1_h = weight_variable([1, 5, 3, 16]) + b_conv1_1_h = bias_variable([16]) + + h_concat1_1_h = tf.concat([batch_X_h, batch_mask_1], 3) + h_conv1_1_h = tf.nn.relu(conv2d(h_concat1_1_h, W_conv1_1_h) + b_conv1_1_h) + h_pool1_1_h = max_pool(h_conv1_1_h, 1, 2) + + W_conv2_1_h = weight_variable([1, 9, 3, 16]) + b_conv2_1_h = bias_variable([16]) + + h_concat2_1_h = tf.concat([batch_X_h, batch_mask_1], 3) + h_conv2_1_h = tf.nn.relu(conv2d(h_concat2_1_h, W_conv2_1_h) + b_conv2_1_h) + h_pool2_1_h = max_pool(h_conv2_1_h, 1, 2) + + W_conv3_1_h = weight_variable([1, 13, 3, 16]) + b_conv3_1_h = bias_variable([16]) + + h_concat3_1_h = tf.concat([batch_X_h, batch_mask_1], 3) + h_conv3_1_h = tf.nn.relu(conv2d(h_concat3_1_h, W_conv3_1_h) + b_conv3_1_h) + h_pool3_1_h = max_pool(h_conv3_1_h, 1, 2) + + conv1_h = tf.concat([h_pool4_1_h, h_pool1_1_h], 3) + conv1_h = tf.concat([conv1_h, h_pool2_1_h], 3) + conv1_h = tf.concat([conv1_h, h_pool3_1_h], 3) + conv1_h = tf.concat([conv1_h, mask_pool4_1], 3) + + # conv2_h + W_conv4_2_h = weight_variable([1, 3, 66, 64]) + b_conv4_2_h = bias_variable([64]) + + h_conv4_2_h = tf.nn.relu(conv2d(conv1_h, W_conv4_2_h) + b_conv4_2_h) + h_pool4_2_h = max_pool(h_conv4_2_h, 1, 2) + + W_conv1_2_h = weight_variable([1, 5, 66, 64]) + b_conv1_2_h = bias_variable([64]) + + h_conv1_2_h = tf.nn.relu(conv2d(conv1_h, W_conv1_2_h) + b_conv1_2_h) + h_pool1_2_h = max_pool(h_conv1_2_h, 1, 2) + + W_conv2_2_h = weight_variable([1, 9, 66, 64]) + b_conv2_2_h = bias_variable([64]) + + h_conv2_2_h = tf.nn.relu(conv2d(conv1_h, W_conv2_2_h) + b_conv2_2_h) + h_pool2_2_h = max_pool(h_conv2_2_h, 1, 2) + + W_conv3_2_h = weight_variable([1, 13, 66, 64]) + b_conv3_2_h = bias_variable([64]) + + h_conv3_2_h = tf.nn.relu(conv2d(conv1_h, W_conv3_2_h) + b_conv3_2_h) + h_pool3_2_h = max_pool(h_conv3_2_h, 1, 2) + + conv2_h = tf.concat([h_pool4_2_h, h_pool1_2_h], 3) + conv2_h = tf.concat([conv2_h, h_pool2_2_h], 3) + conv2_h = tf.concat([conv2_h, h_pool3_2_h], 3) + conv2_h = tf.concat([conv2_h, mask_pool4_2], 3) + + # conv3_h + W_conv4_3_h = weight_variable([1, 3, 258, 128]) + b_conv4_3_h = bias_variable([128]) + + h_conv4_3_h = tf.nn.relu(conv2d(conv2_h, W_conv4_3_h) + b_conv4_3_h) + h_pool4_3_h = max_pool(h_conv4_3_h, 1, 2) + + h_pool_flat4_h = tf.reshape(h_pool4_3_h, [-1, 1*8*128]) + + W_conv1_3_h = weight_variable([1, 5, 258, 128]) + b_conv1_3_h = bias_variable([128]) + + h_conv1_3_h = tf.nn.relu(conv2d(conv2_h, W_conv1_3_h) + b_conv1_3_h) + h_pool1_3_h = max_pool(h_conv1_3_h, 1, 2) + + h_pool_flat1_h = tf.reshape(h_pool1_3_h, [-1, 1*8*128]) + + W_conv2_3_h = weight_variable([1, 9, 258, 128]) + b_conv2_3_h = bias_variable([128]) + + h_conv2_3_h = tf.nn.relu(conv2d(conv2_h, W_conv2_3_h) + b_conv2_3_h) + h_pool2_3_h = max_pool(h_conv2_3_h, 1, 2) + + h_pool_flat2_h = tf.reshape(h_pool2_3_h, [-1, 1*8*128]) + + W_conv3_3_h = weight_variable([1, 13, 258, 128]) + b_conv3_3_h = bias_variable([128]) + + h_conv3_3_h = tf.nn.relu(conv2d(conv2_h, W_conv3_3_h) + b_conv3_3_h) + h_pool3_3_h = max_pool(h_conv3_3_h, 1, 2) + + h_pool_flat3_h = tf.reshape(h_pool3_3_h, [-1, 1*8*128]) + + + # conv1_ap + W_conv4_1_a = weight_variable([1, 3, 3, 16]) + b_conv4_1_a = bias_variable([16]) + + h_concat4_1_a = tf.concat([batch_X_a, batch_mask_2], 3) + h_conv4_1_a = tf.nn.relu(conv2d(h_concat4_1_a, W_conv4_1_a) + b_conv4_1_a) + h_pool4_1_a = max_pool(h_conv4_1_a, 1, 2) + + W_conv1_1_a = weight_variable([1, 5, 3, 16]) + b_conv1_1_a = bias_variable([16]) + + h_concat1_1_a = tf.concat([batch_X_a, batch_mask_2], 3) + h_conv1_1_a = tf.nn.relu(conv2d(h_concat1_1_a, W_conv1_1_a) + b_conv1_1_a) + h_pool1_1_a = max_pool(h_conv1_1_a, 1, 2) + + W_conv2_1_a = weight_variable([1, 9, 3, 16]) + b_conv2_1_a = bias_variable([16]) + + h_concat2_1_a = tf.concat([batch_X_a, batch_mask_2], 3) + h_conv2_1_a = tf.nn.relu(conv2d(h_concat2_1_a, W_conv2_1_a) + b_conv2_1_a) + h_pool2_1_a = max_pool(h_conv2_1_a, 1, 2) + + W_conv3_1_a = weight_variable([1, 13, 3, 16]) + b_conv3_1_a = bias_variable([16]) + + h_concat3_1_a = tf.concat([batch_X_a, batch_mask_2], 3) + h_conv3_1_a = tf.nn.relu(conv2d(h_concat3_1_a, W_conv3_1_a) + b_conv3_1_a) + h_pool3_1_a = max_pool(h_conv3_1_a, 1, 2) + + mask_pool3_1_a = max_pool(batch_mask_2, 1, 2) + + conv1_ap = tf.concat([h_pool4_1_a, h_pool1_1_a], 3) + conv1_ap = tf.concat([conv1_ap, h_pool2_1_a], 3) + conv1_ap = tf.concat([conv1_ap, h_pool3_1_a], 3) + conv1_ap = tf.concat([conv1_ap, mask_pool3_1_a], 3) + + # conv2_ap + W_conv4_2_a = weight_variable([1, 3, 66, 64]) + b_conv4_2_a = bias_variable([64]) + + h_conv4_2_a = tf.nn.relu(conv2d(conv1_ap, W_conv4_2_a) + b_conv4_2_a) + h_pool4_2_a = max_pool(h_conv4_2_a, 1, 2) + + W_conv1_2_a = weight_variable([1, 5, 66, 64]) + b_conv1_2_a = bias_variable([64]) + + h_conv1_2_a = tf.nn.relu(conv2d(conv1_ap, W_conv1_2_a) + b_conv1_2_a) + h_pool1_2_a = max_pool(h_conv1_2_a, 1, 2) + + W_conv2_2_a = weight_variable([1, 9, 66, 64]) + b_conv2_2_a = bias_variable([64]) + + h_conv2_2_a = tf.nn.relu(conv2d(conv1_ap, W_conv2_2_a) + b_conv2_2_a) + h_pool2_2_a = max_pool(h_conv2_2_a, 1, 2) + + W_conv3_2_a = weight_variable([1, 13, 66, 64]) + b_conv3_2_a = bias_variable([64]) + + h_conv3_2_a = tf.nn.relu(conv2d(conv1_ap, W_conv3_2_a) + b_conv3_2_a) + h_pool3_2_a = max_pool(h_conv3_2_a, 1, 2) + + mask_pool3_2_a = max_pool(mask_pool3_1_a, 1, 2) + + conv2_ap = tf.concat([h_pool4_2_a, h_pool1_2_a], 3) + conv2_ap = tf.concat([conv2_ap, h_pool2_2_a], 3) + conv2_ap = tf.concat([conv2_ap, h_pool3_2_a], 3) + conv2_ap = tf.concat([conv2_ap, mask_pool3_2_a], 3) + + # conv3_ap + W_conv4_3_a = weight_variable([1, 3, 258, 128]) + b_conv4_3_a = bias_variable([128]) + + h_conv4_3_a = tf.nn.relu(conv2d(conv2_ap, W_conv4_3_a) + b_conv4_3_a) + h_pool4_3_a = max_pool(h_conv4_3_a, 1, 2) + h_pool4_3_a = tf.reduce_mean(h_pool4_3_a, axis=1) + + h_pool_flat4_a = tf.reshape(h_pool4_3_a, [-1, 1*8*128]) + + W_conv1_3_a = weight_variable([1, 5, 258, 128]) + b_conv1_3_a = bias_variable([128]) + + h_conv1_3_a = tf.nn.relu(conv2d(conv2_ap, W_conv1_3_a) + b_conv1_3_a) + h_pool1_3_a = max_pool(h_conv1_3_a, 1, 2) + h_pool1_3_a = tf.reduce_mean(h_pool1_3_a, axis=1) + + h_pool_flat1_a = tf.reshape(h_pool1_3_a, [-1, 1*8*128]) + + W_conv2_3_a = weight_variable([1, 9, 258, 128]) + b_conv2_3_a = bias_variable([128]) + + h_conv2_3_a = tf.nn.relu(conv2d(conv2_ap, W_conv2_3_a) + b_conv2_3_a) + h_pool2_3_a = max_pool(h_conv2_3_a, 1, 2) + h_pool2_3_a = tf.reduce_mean(h_pool2_3_a, axis=1) + + h_pool_flat2_a = tf.reshape(h_pool2_3_a, [-1, 1*8*128]) + + W_conv3_3_a = weight_variable([1, 13, 258, 128]) + b_conv3_3_a = bias_variable([128]) + + h_conv3_3_a = tf.nn.relu(conv2d(conv2_ap, W_conv3_3_a) + b_conv3_3_a) + h_pool3_3_a = max_pool(h_conv3_3_a, 1, 2) + h_pool3_3_a = tf.reduce_mean(h_pool3_3_a, axis=1) + + h_pool_flat3_a = tf.reshape(h_pool3_3_a, [-1, 1*8*128]) + + + # fc + h_pool_flat = tf.concat([h_pool_flat1_x, h_pool_flat1_y, h_pool_flat1_w, h_pool_flat1_h, h_pool_flat1_a, + h_pool_flat2_x, h_pool_flat2_y, h_pool_flat2_w, h_pool_flat2_h, h_pool_flat2_a, + h_pool_flat3_x, h_pool_flat3_y, h_pool_flat3_w, h_pool_flat3_h, h_pool_flat3_a, + h_pool_flat4_x, h_pool_flat4_y, h_pool_flat4_w, h_pool_flat4_h, h_pool_flat4_a], 1) + + W_fc1 = weight_variable([20 * 8 * 128, 2048]) + b_fc1 = bias_variable([2048]) + + h_fc1 = tf.nn.relu(tf.matmul(h_pool_flat, W_fc1) + b_fc1) + + h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) + + W_fc2 = weight_variable([2048, 1024]) + b_fc2 = bias_variable([1024]) + + h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) + + h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob) + + W_fc3 = weight_variable([1024, 2]) + b_fc3 = bias_variable([2]) + + y_conv = tf.matmul(h_fc2_drop, W_fc3) + b_fc3 + + return y_conv diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/__init__.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/align/align_dataset_mtcnn.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/align/align_dataset_mtcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..a86e165b9e3eb7972f1f3b23ba118cf41c4a28b9 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/align/align_dataset_mtcnn.py @@ -0,0 +1,186 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Performs face alignment and stores face thumbnails in the output directory.""" +# MIT License +# +# Copyright (c) 2016 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from scipy import misc +import sys +import os +import argparse +import tensorflow as tf +import numpy as np +import facenet +import align.detect_face +import random +from time import sleep + +def main(args): + sleep(random.random()) + output_dir = os.path.expanduser(args.output_dir) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + # Store some git revision info in a text file in the log directory + src_path,_ = os.path.split(os.path.realpath(__file__)) + facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) + dataset = facenet.get_dataset(args.input_dir) + + print('Creating networks and loading parameters') + + with tf.Graph().as_default(): + gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) + sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) + with sess.as_default(): + pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) + + minsize = 20 # minimum size of face + threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold + factor = 0.709 # scale factor + + # Add a random key to the filename to allow alignment using multiple processes + random_key = np.random.randint(0, high=99999) + bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) + + with open(bounding_boxes_filename, "w") as text_file: + nrof_images_total = 0 + nrof_successfully_aligned = 0 + if args.random_order: + random.shuffle(dataset) + for cls in dataset: + output_class_dir = os.path.join(output_dir, cls.name) + if not os.path.exists(output_class_dir): + os.makedirs(output_class_dir) + if args.random_order: + random.shuffle(cls.image_paths) + for image_path in cls.image_paths: + nrof_images_total += 1 + filename = os.path.splitext(os.path.split(image_path)[1])[0] + output_filename = os.path.join(output_class_dir, filename+'.png') + print(image_path) + if not os.path.exists(output_filename): + try: + img = misc.imread(image_path) + except (IOError, ValueError, IndexError) as e: + errorMessage = '{}: {}'.format(image_path, e) + print(errorMessage) + else: + if img.ndim<2: + print('Unable to align "%s"' % image_path) + text_file.write('%s\n' % (output_filename)) + continue + if img.ndim == 2: + img = facenet.to_rgb(img) + img = img[:,:,0:3] + + bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) + nrof_faces = bounding_boxes.shape[0] + if nrof_faces>0: + det = bounding_boxes[:,0:4] + det_arr = [] + img_size = np.asarray(img.shape)[0:2] + if nrof_faces>1: + if args.detect_multiple_faces: + for i in range(nrof_faces): + det_arr.append(np.squeeze(det[i])) + else: + bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) + img_center = img_size / 2 + offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) + offset_dist_squared = np.sum(np.power(offsets,2.0),0) + index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering + det_arr.append(det[index,:]) + else: + det_arr.append(np.squeeze(det)) + + for i, det in enumerate(det_arr): + det = np.squeeze(det) + bb = np.zeros(4, dtype=np.int32) + bb[0] = np.maximum(det[0]-args.margin/2, 0) + bb[1] = np.maximum(det[1]-args.margin/2, 0) + bb[2] = np.minimum(det[2]+args.margin/2, img_size[1]) + bb[3] = np.minimum(det[3]+args.margin/2, img_size[0]) + cropped = img[bb[1]:bb[3],bb[0]:bb[2],:] + scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear') + nrof_successfully_aligned += 1 + filename_base, file_extension = os.path.splitext(output_filename) + if args.detect_multiple_faces: + output_filename_n = "{}_{}{}".format(filename_base, i, file_extension) + else: + output_filename_n = "{}{}".format(filename_base, file_extension) + misc.imsave(output_filename_n, scaled) + text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3])) + else: + print('Unable to align "%s"' % image_path) + text_file.write('%s\n' % (output_filename)) + + print('Total number of images: %d' % nrof_images_total) + print('Number of successfully aligned images: %d' % nrof_successfully_aligned) + + +def parse_arguments(argv): + parser = argparse.ArgumentParser() + + parser.add_argument('input_dir', type=str, help='Directory with unaligned images.') + parser.add_argument('output_dir', type=str, help='Directory with aligned face thumbnails.') + parser.add_argument('--image_size', type=int, + help='Image size (height, width) in pixels.', default=182) + parser.add_argument('--margin', type=int, + help='Margin for the crop around the bounding box (height, width) in pixels.', default=44) + parser.add_argument('--random_order', + help='Shuffles the order of images to enable alignment using multiple processes.', action='store_true') + parser.add_argument('--gpu_memory_fraction', type=float, + help='Upper bound on the amount of GPU memory that will be used by the process.', default=1.0) + parser.add_argument('--detect_multiple_faces', type=bool, + help='Detect and align multiple faces per image.', default=False) + return parser.parse_args(argv) + +if __name__ == '__main__': + main(parse_arguments(sys.argv[1:])) diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/align/det1.npy b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/align/det1.npy new file mode 100644 index 0000000000000000000000000000000000000000..7c05a2c5625e0f4e8c9f633b5ddef5e942b03032 Binary files /dev/null and b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/align/det1.npy differ diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/align/det2.npy b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/align/det2.npy new file mode 100644 index 0000000000000000000000000000000000000000..85d5bf09c9e42053f5587195f55b1c82abe81fde Binary files /dev/null and b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/align/det2.npy differ diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/align/det3.npy b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/align/det3.npy new file mode 100644 index 0000000000000000000000000000000000000000..90d5ba975440fa18c2d020da6739e96ed5e04905 Binary files /dev/null and b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/align/det3.npy differ diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/align/detect_face.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/align/detect_face.py new file mode 100644 index 0000000000000000000000000000000000000000..9bd422e984d4c002a54814853ccf2a009c409f99 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/align/detect_face.py @@ -0,0 +1,808 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Tensorflow implementation of the face detection / alignment algorithm found at +https://github.com/kpzhang93/MTCNN_face_detection_alignment +""" +# MIT License +# +# Copyright (c) 2016 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from six import string_types, iteritems + +import numpy as np +import tensorflow as tf +#from math import floor +import cv2 +import os + +def layer(op): + """Decorator for composable network layers.""" + + def layer_decorated(self, *args, **kwargs): + # Automatically set a name if not provided. + name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) + # Figure out the layer inputs. + if len(self.terminals) == 0: + raise RuntimeError('No input variables found for layer %s.' % name) + elif len(self.terminals) == 1: + layer_input = self.terminals[0] + else: + layer_input = list(self.terminals) + # Perform the operation and get the output. + layer_output = op(self, layer_input, *args, **kwargs) + # Add to layer LUT. + self.layers[name] = layer_output + # This output is now the input for the next layer. + self.feed(layer_output) + # Return self for chained calls. + return self + + return layer_decorated + +class Network(object): + + def __init__(self, inputs, trainable=True): + # The input nodes for this network + self.inputs = inputs + # The current list of terminal nodes + self.terminals = [] + # Mapping from layer names to layers + self.layers = dict(inputs) + # If true, the resulting variables are set as trainable + self.trainable = trainable + + self.setup() + + def setup(self): + """Construct the network. """ + raise NotImplementedError('Must be implemented by the subclass.') + + def load(self, data_path, session, ignore_missing=False): + """Load network weights. + data_path: The path to the numpy-serialized network weights + session: The current TensorFlow session + ignore_missing: If true, serialized weights for missing layers are ignored. + """ + data_dict = np.load(data_path, encoding='latin1').item() #pylint: disable=no-member + + for op_name in data_dict: + with tf.variable_scope(op_name, reuse=True): + for param_name, data in iteritems(data_dict[op_name]): + try: + var = tf.get_variable(param_name) + session.run(var.assign(data)) + except ValueError: + if not ignore_missing: + raise + + def feed(self, *args): + """Set the input(s) for the next operation by replacing the terminal nodes. + The arguments can be either layer names or the actual layers. + """ + assert len(args) != 0 + self.terminals = [] + for fed_layer in args: + if isinstance(fed_layer, string_types): + try: + fed_layer = self.layers[fed_layer] + except KeyError: + raise KeyError('Unknown layer name fed: %s' % fed_layer) + self.terminals.append(fed_layer) + return self + + def get_output(self): + """Returns the current network output.""" + return self.terminals[-1] + + def get_unique_name(self, prefix): + """Returns an index-suffixed unique name for the given prefix. + This is used for auto-generating layer names based on the type-prefix. + """ + ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 + return '%s_%d' % (prefix, ident) + + def make_var(self, name, shape): + """Creates a new TensorFlow variable.""" + return tf.get_variable(name, shape, trainable=self.trainable) + + def validate_padding(self, padding): + """Verifies that the padding is one of the supported ones.""" + assert padding in ('SAME', 'VALID') + + @layer + def conv(self, + inp, + k_h, + k_w, + c_o, + s_h, + s_w, + name, + relu=True, + padding='SAME', + group=1, + biased=True): + # Verify that the padding is acceptable + self.validate_padding(padding) + # Get the number of channels in the input + c_i = int(inp.get_shape()[-1]) + # Verify that the grouping parameter is valid + assert c_i % group == 0 + assert c_o % group == 0 + # Convolution for a given input and kernel + convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) + with tf.variable_scope(name) as scope: + kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o]) + # This is the common-case. Convolve the input without any further complications. + output = convolve(inp, kernel) + # Add the biases + if biased: + biases = self.make_var('biases', [c_o]) + output = tf.nn.bias_add(output, biases) + if relu: + # ReLU non-linearity + output = tf.nn.relu(output, name=scope.name) + return output + + @layer + def prelu(self, inp, name): + with tf.variable_scope(name): + i = int(inp.get_shape()[-1]) + alpha = self.make_var('alpha', shape=(i,)) + output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp)) + return output + + @layer + def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'): + self.validate_padding(padding) + return tf.nn.max_pool(inp, + ksize=[1, k_h, k_w, 1], + strides=[1, s_h, s_w, 1], + padding=padding, + name=name) + + @layer + def fc(self, inp, num_out, name, relu=True): + with tf.variable_scope(name): + input_shape = inp.get_shape() + if input_shape.ndims == 4: + # The input is spatial. Vectorize it first. + dim = 1 + for d in input_shape[1:].as_list(): + dim *= int(d) + feed_in = tf.reshape(inp, [-1, dim]) + else: + feed_in, dim = (inp, input_shape[-1].value) + weights = self.make_var('weights', shape=[dim, num_out]) + biases = self.make_var('biases', [num_out]) + op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b + fc = op(feed_in, weights, biases, name=name) + return fc + + + """ + Multi dimensional softmax, + refer to https://github.com/tensorflow/tensorflow/issues/210 + compute softmax along the dimension of target + the native softmax only supports batch_size x dimension + """ + @layer + def softmax(self, target, axis, name=None): + max_axis = tf.reduce_max(target, axis, keepdims=True) + target_exp = tf.exp(target-max_axis) + normalize = tf.reduce_sum(target_exp, axis, keepdims=True) + softmax = tf.div(target_exp, normalize, name) + return softmax + +class PNet(Network): + def setup(self): + (self.feed('data') #pylint: disable=no-value-for-parameter, no-member + .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1') + .prelu(name='PReLU1') + .max_pool(2, 2, 2, 2, name='pool1') + .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2') + .prelu(name='PReLU2') + .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3') + .prelu(name='PReLU3') + .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1') + .softmax(3,name='prob1')) + + (self.feed('PReLU3') #pylint: disable=no-value-for-parameter + .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2')) + +class RNet(Network): + def setup(self): + (self.feed('data') #pylint: disable=no-value-for-parameter, no-member + .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1') + .prelu(name='prelu1') + .max_pool(3, 3, 2, 2, name='pool1') + .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2') + .prelu(name='prelu2') + .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') + .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3') + .prelu(name='prelu3') + .fc(128, relu=False, name='conv4') + .prelu(name='prelu4') + .fc(2, relu=False, name='conv5-1') + .softmax(1,name='prob1')) + + (self.feed('prelu4') #pylint: disable=no-value-for-parameter + .fc(4, relu=False, name='conv5-2')) + +class ONet(Network): + def setup(self): + (self.feed('data') #pylint: disable=no-value-for-parameter, no-member + .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1') + .prelu(name='prelu1') + .max_pool(3, 3, 2, 2, name='pool1') + .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2') + .prelu(name='prelu2') + .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') + .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3') + .prelu(name='prelu3') + .max_pool(2, 2, 2, 2, name='pool3') + .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4') + .prelu(name='prelu4') + .fc(256, relu=False, name='conv5') + .prelu(name='prelu5') + .fc(2, relu=False, name='conv6-1') + .softmax(1, name='prob1')) + + (self.feed('prelu5') #pylint: disable=no-value-for-parameter + .fc(4, relu=False, name='conv6-2')) + + (self.feed('prelu5') #pylint: disable=no-value-for-parameter + .fc(10, relu=False, name='conv6-3')) + +def create_mtcnn(sess, model_path): + if not model_path: + model_path,_ = os.path.split(os.path.realpath(__file__)) + + with tf.variable_scope('pnet'): + data = tf.placeholder(tf.float32, (None,None,None,3), 'input') + pnet = PNet({'data':data}) + pnet.load(os.path.join(model_path, 'det1.npy'), sess) + with tf.variable_scope('rnet'): + data = tf.placeholder(tf.float32, (None,24,24,3), 'input') + rnet = RNet({'data':data}) + rnet.load(os.path.join(model_path, 'det2.npy'), sess) + with tf.variable_scope('onet'): + data = tf.placeholder(tf.float32, (None,48,48,3), 'input') + onet = ONet({'data':data}) + onet.load(os.path.join(model_path, 'det3.npy'), sess) + + pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img}) + rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img}) + onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img}) + return pnet_fun, rnet_fun, onet_fun + +def detect_face(img, minsize, pnet, rnet, onet, threshold, factor): + """Detects faces in an image, and returns bounding boxes and points for them. + img: input image + minsize: minimum faces' size + pnet, rnet, onet: caffemodel + threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold + factor: the factor used to create a scaling pyramid of face sizes to detect in the image. + """ + factor_count=0 + total_boxes=np.empty((0,9)) + points=np.empty(0) + h=img.shape[0] + w=img.shape[1] + minl=np.amin([h, w]) + m=12.0/minsize + minl=minl*m + # create scale pyramid + scales=[] + while minl>=12: + scales += [m*np.power(factor, factor_count)] + minl = minl*factor + factor_count += 1 + + # first stage + for scale in scales: + hs=int(np.ceil(h*scale)) + ws=int(np.ceil(w*scale)) + im_data = imresample(img, (hs, ws)) + im_data = (im_data-127.5)*0.0078125 + img_x = np.expand_dims(im_data, 0) + img_y = np.transpose(img_x, (0,2,1,3)) + out = pnet(img_y) + out0 = np.transpose(out[0], (0,2,1,3)) + out1 = np.transpose(out[1], (0,2,1,3)) + + boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0]) + + # inter-scale nms + pick = nms(boxes.copy(), 0.5, 'Union') + if boxes.size>0 and pick.size>0: + boxes = boxes[pick,:] + total_boxes = np.append(total_boxes, boxes, axis=0) + + numbox = total_boxes.shape[0] + if numbox>0: + pick = nms(total_boxes.copy(), 0.7, 'Union') + total_boxes = total_boxes[pick,:] + regw = total_boxes[:,2]-total_boxes[:,0] + regh = total_boxes[:,3]-total_boxes[:,1] + qq1 = total_boxes[:,0]+total_boxes[:,5]*regw + qq2 = total_boxes[:,1]+total_boxes[:,6]*regh + qq3 = total_boxes[:,2]+total_boxes[:,7]*regw + qq4 = total_boxes[:,3]+total_boxes[:,8]*regh + total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]])) + total_boxes = rerec(total_boxes.copy()) + total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32) + dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) + + numbox = total_boxes.shape[0] + if numbox>0: + # second stage + tempimg = np.zeros((24,24,3,numbox)) + for k in range(0,numbox): + tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) + tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] + if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: + tempimg[:,:,:,k] = imresample(tmp, (24, 24)) + else: + return np.empty() + tempimg = (tempimg-127.5)*0.0078125 + tempimg1 = np.transpose(tempimg, (3,1,0,2)) + out = rnet(tempimg1) + out0 = np.transpose(out[0]) + out1 = np.transpose(out[1]) + score = out1[1,:] + ipass = np.where(score>threshold[1]) + total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) + mv = out0[:,ipass[0]] + if total_boxes.shape[0]>0: + pick = nms(total_boxes, 0.7, 'Union') + total_boxes = total_boxes[pick,:] + total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick])) + total_boxes = rerec(total_boxes.copy()) + + numbox = total_boxes.shape[0] + if numbox>0: + # third stage + total_boxes = np.fix(total_boxes).astype(np.int32) + dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) + tempimg = np.zeros((48,48,3,numbox)) + for k in range(0,numbox): + tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) + tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] + if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: + tempimg[:,:,:,k] = imresample(tmp, (48, 48)) + else: + return np.empty() + tempimg = (tempimg-127.5)*0.0078125 + tempimg1 = np.transpose(tempimg, (3,1,0,2)) + out = onet(tempimg1) + out0 = np.transpose(out[0]) + out1 = np.transpose(out[1]) + out2 = np.transpose(out[2]) + score = out2[1,:] + points = out1 + ipass = np.where(score>threshold[2]) + points = points[:,ipass[0]] + total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) + mv = out0[:,ipass[0]] + + w = total_boxes[:,2]-total_boxes[:,0]+1 + h = total_boxes[:,3]-total_boxes[:,1]+1 + points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1 + points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1 + if total_boxes.shape[0]>0: + total_boxes = bbreg(total_boxes.copy(), np.transpose(mv)) + pick = nms(total_boxes.copy(), 0.7, 'Min') + total_boxes = total_boxes[pick,:] + points = points[:,pick] + + return total_boxes, points + + +def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor): + """Detects faces in a list of images + images: list containing input images + detection_window_size_ratio: ratio of minimum face size to smallest image dimension + pnet, rnet, onet: caffemodel + threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1] + factor: the factor used to create a scaling pyramid of face sizes to detect in the image. + """ + all_scales = [None] * len(images) + images_with_boxes = [None] * len(images) + + for i in range(len(images)): + images_with_boxes[i] = {'total_boxes': np.empty((0, 9))} + + # create scale pyramid + for index, img in enumerate(images): + all_scales[index] = [] + h = img.shape[0] + w = img.shape[1] + minsize = int(detection_window_size_ratio * np.minimum(w, h)) + factor_count = 0 + minl = np.amin([h, w]) + if minsize <= 12: + minsize = 12 + + m = 12.0 / minsize + minl = minl * m + while minl >= 12: + all_scales[index].append(m * np.power(factor, factor_count)) + minl = minl * factor + factor_count += 1 + + # # # # # # # # # # # # # + # first stage - fast proposal network (pnet) to obtain face candidates + # # # # # # # # # # # # # + + images_obj_per_resolution = {} + + # TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images + + for index, scales in enumerate(all_scales): + h = images[index].shape[0] + w = images[index].shape[1] + + for scale in scales: + hs = int(np.ceil(h * scale)) + ws = int(np.ceil(w * scale)) + + if (ws, hs) not in images_obj_per_resolution: + images_obj_per_resolution[(ws, hs)] = [] + + im_data = imresample(images[index], (hs, ws)) + im_data = (im_data - 127.5) * 0.0078125 + img_y = np.transpose(im_data, (1, 0, 2)) # caffe uses different dimensions ordering + images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index}) + + for resolution in images_obj_per_resolution: + images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]] + outs = pnet(images_per_resolution) + + for index in range(len(outs[0])): + scale = images_obj_per_resolution[resolution][index]['scale'] + image_index = images_obj_per_resolution[resolution][index]['index'] + out0 = np.transpose(outs[0][index], (1, 0, 2)) + out1 = np.transpose(outs[1][index], (1, 0, 2)) + + boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0]) + + # inter-scale nms + pick = nms(boxes.copy(), 0.5, 'Union') + if boxes.size > 0 and pick.size > 0: + boxes = boxes[pick, :] + images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'], + boxes, + axis=0) + + for index, image_obj in enumerate(images_with_boxes): + numbox = image_obj['total_boxes'].shape[0] + if numbox > 0: + h = images[index].shape[0] + w = images[index].shape[1] + pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union') + image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] + regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw + qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh + qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw + qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh + image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]])) + image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy()) + image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32) + dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h) + + numbox = image_obj['total_boxes'].shape[0] + tempimg = np.zeros((24, 24, 3, numbox)) + + if numbox > 0: + for k in range(0, numbox): + tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) + tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :] + if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: + tempimg[:, :, :, k] = imresample(tmp, (24, 24)) + else: + return np.empty() + + tempimg = (tempimg - 127.5) * 0.0078125 + image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2)) + + # # # # # # # # # # # # # + # second stage - refinement of face candidates with rnet + # # # # # # # # # # # # # + + bulk_rnet_input = np.empty((0, 24, 24, 3)) + for index, image_obj in enumerate(images_with_boxes): + if 'rnet_input' in image_obj: + bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0) + + out = rnet(bulk_rnet_input) + out0 = np.transpose(out[0]) + out1 = np.transpose(out[1]) + score = out1[1, :] + + i = 0 + for index, image_obj in enumerate(images_with_boxes): + if 'rnet_input' not in image_obj: + continue + + rnet_input_count = image_obj['rnet_input'].shape[0] + score_per_image = score[i:i + rnet_input_count] + out0_per_image = out0[:, i:i + rnet_input_count] + + ipass = np.where(score_per_image > threshold[1]) + image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(), + np.expand_dims(score_per_image[ipass].copy(), 1)]) + + mv = out0_per_image[:, ipass[0]] + + if image_obj['total_boxes'].shape[0] > 0: + h = images[index].shape[0] + w = images[index].shape[1] + pick = nms(image_obj['total_boxes'], 0.7, 'Union') + image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] + image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick])) + image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy()) + + numbox = image_obj['total_boxes'].shape[0] + + if numbox > 0: + tempimg = np.zeros((48, 48, 3, numbox)) + image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32) + dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h) + + for k in range(0, numbox): + tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) + tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :] + if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: + tempimg[:, :, :, k] = imresample(tmp, (48, 48)) + else: + return np.empty() + tempimg = (tempimg - 127.5) * 0.0078125 + image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2)) + + i += rnet_input_count + + # # # # # # # # # # # # # + # third stage - further refinement and facial landmarks positions with onet + # # # # # # # # # # # # # + + bulk_onet_input = np.empty((0, 48, 48, 3)) + for index, image_obj in enumerate(images_with_boxes): + if 'onet_input' in image_obj: + bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0) + + out = onet(bulk_onet_input) + + out0 = np.transpose(out[0]) + out1 = np.transpose(out[1]) + out2 = np.transpose(out[2]) + score = out2[1, :] + points = out1 + + i = 0 + ret = [] + for index, image_obj in enumerate(images_with_boxes): + if 'onet_input' not in image_obj: + ret.append(None) + continue + + onet_input_count = image_obj['onet_input'].shape[0] + + out0_per_image = out0[:, i:i + onet_input_count] + score_per_image = score[i:i + onet_input_count] + points_per_image = points[:, i:i + onet_input_count] + + ipass = np.where(score_per_image > threshold[2]) + points_per_image = points_per_image[:, ipass[0]] + + image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(), + np.expand_dims(score_per_image[ipass].copy(), 1)]) + mv = out0_per_image[:, ipass[0]] + + w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1 + h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1 + points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile( + image_obj['total_boxes'][:, 0], (5, 1)) - 1 + points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile( + image_obj['total_boxes'][:, 1], (5, 1)) - 1 + + if image_obj['total_boxes'].shape[0] > 0: + image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv)) + pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min') + image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] + points_per_image = points_per_image[:, pick] + + ret.append((image_obj['total_boxes'], points_per_image)) + else: + ret.append(None) + + i += onet_input_count + + return ret + + +# function [boundingbox] = bbreg(boundingbox,reg) +def bbreg(boundingbox,reg): + """Calibrate bounding boxes""" + if reg.shape[1]==1: + reg = np.reshape(reg, (reg.shape[2], reg.shape[3])) + + w = boundingbox[:,2]-boundingbox[:,0]+1 + h = boundingbox[:,3]-boundingbox[:,1]+1 + b1 = boundingbox[:,0]+reg[:,0]*w + b2 = boundingbox[:,1]+reg[:,1]*h + b3 = boundingbox[:,2]+reg[:,2]*w + b4 = boundingbox[:,3]+reg[:,3]*h + boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ])) + return boundingbox + +def generateBoundingBox(imap, reg, scale, t): + """Use heatmap to generate bounding boxes""" + stride=2 + cellsize=12 + + imap = np.transpose(imap) + dx1 = np.transpose(reg[:,:,0]) + dy1 = np.transpose(reg[:,:,1]) + dx2 = np.transpose(reg[:,:,2]) + dy2 = np.transpose(reg[:,:,3]) + y, x = np.where(imap >= t) + if y.shape[0]==1: + dx1 = np.flipud(dx1) + dy1 = np.flipud(dy1) + dx2 = np.flipud(dx2) + dy2 = np.flipud(dy2) + score = imap[(y,x)] + reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ])) + if reg.size==0: + reg = np.empty((0,3)) + bb = np.transpose(np.vstack([y,x])) + q1 = np.fix((stride*bb+1)/scale) + q2 = np.fix((stride*bb+cellsize-1+1)/scale) + boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg]) + return boundingbox, reg + +# function pick = nms(boxes,threshold,type) +def nms(boxes, threshold, method): + if boxes.size==0: + return np.empty((0,3)) + x1 = boxes[:,0] + y1 = boxes[:,1] + x2 = boxes[:,2] + y2 = boxes[:,3] + s = boxes[:,4] + area = (x2-x1+1) * (y2-y1+1) + I = np.argsort(s) + pick = np.zeros_like(s, dtype=np.int16) + counter = 0 + while I.size>0: + i = I[-1] + pick[counter] = i + counter += 1 + idx = I[0:-1] + xx1 = np.maximum(x1[i], x1[idx]) + yy1 = np.maximum(y1[i], y1[idx]) + xx2 = np.minimum(x2[i], x2[idx]) + yy2 = np.minimum(y2[i], y2[idx]) + w = np.maximum(0.0, xx2-xx1+1) + h = np.maximum(0.0, yy2-yy1+1) + inter = w * h + if method is 'Min': + o = inter / np.minimum(area[i], area[idx]) + else: + o = inter / (area[i] + area[idx] - inter) + I = I[np.where(o<=threshold)] + pick = pick[0:counter] + return pick + +# function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h) +def pad(total_boxes, w, h): + """Compute the padding coordinates (pad the bounding boxes to square)""" + tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32) + tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32) + numbox = total_boxes.shape[0] + + dx = np.ones((numbox), dtype=np.int32) + dy = np.ones((numbox), dtype=np.int32) + edx = tmpw.copy().astype(np.int32) + edy = tmph.copy().astype(np.int32) + + x = total_boxes[:,0].copy().astype(np.int32) + y = total_boxes[:,1].copy().astype(np.int32) + ex = total_boxes[:,2].copy().astype(np.int32) + ey = total_boxes[:,3].copy().astype(np.int32) + + tmp = np.where(ex>w) + edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1) + ex[tmp] = w + + tmp = np.where(ey>h) + edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1) + ey[tmp] = h + + tmp = np.where(x<1) + dx.flat[tmp] = np.expand_dims(2-x[tmp],1) + x[tmp] = 1 + + tmp = np.where(y<1) + dy.flat[tmp] = np.expand_dims(2-y[tmp],1) + y[tmp] = 1 + + return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph + +# function [bboxA] = rerec(bboxA) +def rerec(bboxA): + """Convert bboxA to square.""" + h = bboxA[:,3]-bboxA[:,1] + w = bboxA[:,2]-bboxA[:,0] + l = np.maximum(w, h) + bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5 + bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5 + bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1))) + return bboxA + +def imresample(img, sz): + im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #@UndefinedVariable + return im_data + + # This method is kept for debugging purpose +# h=img.shape[0] +# w=img.shape[1] +# hs, ws = sz +# dx = float(w) / ws +# dy = float(h) / hs +# im_data = np.zeros((hs,ws,3)) +# for a1 in range(0,hs): +# for a2 in range(0,ws): +# for a3 in range(0,3): +# im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3] +# return im_data + diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/calculate_filtering_metrics.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/calculate_filtering_metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..efd2f8790ce2570cdb0b07235436423f650630e8 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/calculate_filtering_metrics.py @@ -0,0 +1,155 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Calculate filtering metrics for a dataset and store in a .hdf file. +""" +# MIT License +# +# Copyright (c) 2016 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +import numpy as np +import argparse +import facenet +import os +import sys +import time +import h5py +import math +from tensorflow.python.platform import gfile +from six import iteritems + +def main(args): + dataset = facenet.get_dataset(args.dataset_dir) + + with tf.Graph().as_default(): + + # Get a list of image paths and their labels + image_list, label_list = facenet.get_image_paths_and_labels(dataset) + nrof_images = len(image_list) + image_indices = range(nrof_images) + + image_batch, label_batch = facenet.read_and_augment_data(image_list, + image_indices, args.image_size, args.batch_size, None, + False, False, False, nrof_preprocess_threads=4, shuffle=False) + + model_exp = os.path.expanduser(args.model_file) + with gfile.FastGFile(model_exp,'rb') as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + input_map={'input':image_batch, 'phase_train':False} + tf.import_graph_def(graph_def, input_map=input_map, name='net') + + embeddings = tf.get_default_graph().get_tensor_by_name("net/embeddings:0") + + with tf.Session() as sess: + tf.train.start_queue_runners(sess=sess) + + embedding_size = int(embeddings.get_shape()[1]) + nrof_batches = int(math.ceil(nrof_images / args.batch_size)) + nrof_classes = len(dataset) + label_array = np.array(label_list) + class_names = [cls.name for cls in dataset] + nrof_examples_per_class = [ len(cls.image_paths) for cls in dataset ] + class_variance = np.zeros((nrof_classes,)) + class_center = np.zeros((nrof_classes,embedding_size)) + distance_to_center = np.ones((len(label_list),))*np.NaN + emb_array = np.zeros((0,embedding_size)) + idx_array = np.zeros((0,), dtype=np.int32) + lab_array = np.zeros((0,), dtype=np.int32) + index_arr = np.append(0, np.cumsum(nrof_examples_per_class)) + for i in range(nrof_batches): + t = time.time() + emb, idx = sess.run([embeddings, label_batch]) + emb_array = np.append(emb_array, emb, axis=0) + idx_array = np.append(idx_array, idx, axis=0) + lab_array = np.append(lab_array, label_array[idx], axis=0) + for cls in set(lab_array): + cls_idx = np.where(lab_array==cls)[0] + if cls_idx.shape[0]==nrof_examples_per_class[cls]: + # We have calculated all the embeddings for this class + i2 = np.argsort(idx_array[cls_idx]) + emb_class = emb_array[cls_idx,:] + emb_sort = emb_class[i2,:] + center = np.mean(emb_sort, axis=0) + diffs = emb_sort - center + dists_sqr = np.sum(np.square(diffs), axis=1) + class_variance[cls] = np.mean(dists_sqr) + class_center[cls,:] = center + distance_to_center[index_arr[cls]:index_arr[cls+1]] = np.sqrt(dists_sqr) + emb_array = np.delete(emb_array, cls_idx, axis=0) + idx_array = np.delete(idx_array, cls_idx, axis=0) + lab_array = np.delete(lab_array, cls_idx, axis=0) + + + print('Batch %d in %.3f seconds' % (i, time.time()-t)) + + print('Writing filtering data to %s' % args.data_file_name) + mdict = {'class_names':class_names, 'image_list':image_list, 'label_list':label_list, 'distance_to_center':distance_to_center } + with h5py.File(args.data_file_name, 'w') as f: + for key, value in iteritems(mdict): + f.create_dataset(key, data=value) + +def parse_arguments(argv): + parser = argparse.ArgumentParser() + + parser.add_argument('dataset_dir', type=str, + help='Path to the directory containing aligned dataset.') + parser.add_argument('model_file', type=str, + help='File containing the frozen model in protobuf (.pb) format to use for feature extraction.') + parser.add_argument('data_file_name', type=str, + help='The name of the file to store filtering data in.') + parser.add_argument('--image_size', type=int, + help='Image size.', default=160) + parser.add_argument('--batch_size', type=int, + help='Number of images to process in a batch.', default=90) + return parser.parse_args(argv) + +if __name__ == '__main__': + main(parse_arguments(sys.argv[1:])) diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/classifier.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/classifier.py new file mode 100644 index 0000000000000000000000000000000000000000..f20ae3b0a7e4de630137ea2bcd9b35dd0a4be941 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/classifier.py @@ -0,0 +1,197 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""An example of how to use your own dataset to train a classifier that recognizes people. +""" +# MIT License +# +# Copyright (c) 2016 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +import numpy as np +import argparse +import facenet +import os +import sys +import math +import pickle +from sklearn.svm import SVC + +def main(args): + + with tf.Graph().as_default(): + + with tf.Session() as sess: + + np.random.seed(seed=args.seed) + + if args.use_split_dataset: + dataset_tmp = facenet.get_dataset(args.data_dir) + train_set, test_set = split_dataset(dataset_tmp, args.min_nrof_images_per_class, args.nrof_train_images_per_class) + if (args.mode=='TRAIN'): + dataset = train_set + elif (args.mode=='CLASSIFY'): + dataset = test_set + else: + dataset = facenet.get_dataset(args.data_dir) + + # Check that there are at least one training image per class + for cls in dataset: + assert(len(cls.image_paths)>0, 'There must be at least one image for each class in the dataset') + + + paths, labels = facenet.get_image_paths_and_labels(dataset) + + print('Number of classes: %d' % len(dataset)) + print('Number of images: %d' % len(paths)) + + # Load the model + print('Loading feature extraction model') + facenet.load_model(args.model) + + # Get input and output tensors + images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") + embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") + phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") + embedding_size = embeddings.get_shape()[1] + + # Run forward pass to calculate embeddings + print('Calculating features for images') + nrof_images = len(paths) + nrof_batches_per_epoch = int(math.ceil(1.0*nrof_images / args.batch_size)) + emb_array = np.zeros((nrof_images, embedding_size)) + for i in range(nrof_batches_per_epoch): + start_index = i*args.batch_size + end_index = min((i+1)*args.batch_size, nrof_images) + paths_batch = paths[start_index:end_index] + images = facenet.load_data(paths_batch, False, False, args.image_size) + feed_dict = { images_placeholder:images, phase_train_placeholder:False } + emb_array[start_index:end_index,:] = sess.run(embeddings, feed_dict=feed_dict) + + classifier_filename_exp = os.path.expanduser(args.classifier_filename) + + if (args.mode=='TRAIN'): + # Train classifier + print('Training classifier') + model = SVC(kernel='linear', probability=True) + model.fit(emb_array, labels) + + # Create a list of class names + class_names = [ cls.name.replace('_', ' ') for cls in dataset] + + # Saving classifier model + with open(classifier_filename_exp, 'wb') as outfile: + pickle.dump((model, class_names), outfile) + print('Saved classifier model to file "%s"' % classifier_filename_exp) + + elif (args.mode=='CLASSIFY'): + # Classify images + print('Testing classifier') + with open(classifier_filename_exp, 'rb') as infile: + (model, class_names) = pickle.load(infile) + + print('Loaded classifier model from file "%s"' % classifier_filename_exp) + + predictions = model.predict_proba(emb_array) + best_class_indices = np.argmax(predictions, axis=1) + best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] + + for i in range(len(best_class_indices)): + print('%4d %s: %.3f' % (i, class_names[best_class_indices[i]], best_class_probabilities[i])) + + accuracy = np.mean(np.equal(best_class_indices, labels)) + print('Accuracy: %.3f' % accuracy) + + +def split_dataset(dataset, min_nrof_images_per_class, nrof_train_images_per_class): + train_set = [] + test_set = [] + for cls in dataset: + paths = cls.image_paths + # Remove classes with less than min_nrof_images_per_class + if len(paths)>=min_nrof_images_per_class: + np.random.shuffle(paths) + train_set.append(facenet.ImageClass(cls.name, paths[:nrof_train_images_per_class])) + test_set.append(facenet.ImageClass(cls.name, paths[nrof_train_images_per_class:])) + return train_set, test_set + + +def parse_arguments(argv): + parser = argparse.ArgumentParser() + + parser.add_argument('mode', type=str, choices=['TRAIN', 'CLASSIFY'], + help='Indicates if a new classifier should be trained or a classification ' + + 'model should be used for classification', default='CLASSIFY') + parser.add_argument('data_dir', type=str, + help='Path to the data directory containing aligned LFW face patches.') + parser.add_argument('model', type=str, + help='Could be either a directory containing the meta_file and ckpt_file or a model protobuf (.pb) file') + parser.add_argument('classifier_filename', + help='Classifier model file name as a pickle (.pkl) file. ' + + 'For training this is the output and for classification this is an input.') + parser.add_argument('--use_split_dataset', + help='Indicates that the dataset specified by data_dir should be split into a training and test set. ' + + 'Otherwise a separate test set can be specified using the test_data_dir option.', action='store_true') + parser.add_argument('--test_data_dir', type=str, + help='Path to the test data directory containing aligned images used for testing.') + parser.add_argument('--batch_size', type=int, + help='Number of images to process in a batch.', default=90) + parser.add_argument('--image_size', type=int, + help='Image size (height, width) in pixels.', default=160) + parser.add_argument('--seed', type=int, + help='Random seed.', default=666) + parser.add_argument('--min_nrof_images_per_class', type=int, + help='Only include classes with at least this number of images in the dataset', default=20) + parser.add_argument('--nrof_train_images_per_class', type=int, + help='Use this number of images from each class for training and the rest for testing', default=10) + + return parser.parse_args(argv) + +if __name__ == '__main__': + main(parse_arguments(sys.argv[1:])) diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/compare.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/compare.py new file mode 100644 index 0000000000000000000000000000000000000000..45e3217d23df6e155dfe83a6b68a11b37355566c --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/compare.py @@ -0,0 +1,157 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Performs face alignment and calculates L2 distance between the embeddings of images.""" + +# MIT License +# +# Copyright (c) 2016 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from scipy import misc +import tensorflow as tf +import numpy as np +import sys +import os +import copy +import argparse +import facenet +import align.detect_face + +def main(args): + + images = load_and_align_data(args.image_files, args.image_size, args.margin, args.gpu_memory_fraction) + with tf.Graph().as_default(): + + with tf.Session() as sess: + + # Load the model + facenet.load_model(args.model) + + # Get input and output tensors + images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") + embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") + phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") + + # Run forward pass to calculate embeddings + feed_dict = { images_placeholder: images, phase_train_placeholder:False } + emb = sess.run(embeddings, feed_dict=feed_dict) + + nrof_images = len(args.image_files) + + print('Images:') + for i in range(nrof_images): + print('%1d: %s' % (i, args.image_files[i])) + print('') + + # Print distance matrix + print('Distance matrix') + print(' ', end='') + for i in range(nrof_images): + print(' %1d ' % i, end='') + print('') + for i in range(nrof_images): + print('%1d ' % i, end='') + for j in range(nrof_images): + dist = np.sqrt(np.sum(np.square(np.subtract(emb[i,:], emb[j,:])))) + print(' %1.4f ' % dist, end='') + print('') + + +def load_and_align_data(image_paths, image_size, margin, gpu_memory_fraction): + + minsize = 20 # minimum size of face + threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold + factor = 0.709 # scale factor + + print('Creating networks and loading parameters') + with tf.Graph().as_default(): + gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction) + sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) + with sess.as_default(): + pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) + + tmp_image_paths=copy.copy(image_paths) + img_list = [] + for image in tmp_image_paths: + img = misc.imread(os.path.expanduser(image), mode='RGB') + img_size = np.asarray(img.shape)[0:2] + bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) + if len(bounding_boxes) < 1: + image_paths.remove(image) + print("can't detect face, remove ", image) + continue + det = np.squeeze(bounding_boxes[0,0:4]) + bb = np.zeros(4, dtype=np.int32) + bb[0] = np.maximum(det[0]-margin/2, 0) + bb[1] = np.maximum(det[1]-margin/2, 0) + bb[2] = np.minimum(det[2]+margin/2, img_size[1]) + bb[3] = np.minimum(det[3]+margin/2, img_size[0]) + cropped = img[bb[1]:bb[3],bb[0]:bb[2],:] + aligned = misc.imresize(cropped, (image_size, image_size), interp='bilinear') + prewhitened = facenet.prewhiten(aligned) + img_list.append(prewhitened) + images = np.stack(img_list) + return images + +def parse_arguments(argv): + parser = argparse.ArgumentParser() + + parser.add_argument('model', type=str, + help='Could be either a directory containing the meta_file and ckpt_file or a model protobuf (.pb) file') + parser.add_argument('image_files', type=str, nargs='+', help='Images to compare') + parser.add_argument('--image_size', type=int, + help='Image size (height, width) in pixels.', default=160) + parser.add_argument('--margin', type=int, + help='Margin for the crop around the bounding box (height, width) in pixels.', default=44) + parser.add_argument('--gpu_memory_fraction', type=float, + help='Upper bound on the amount of GPU memory that will be used by the process.', default=1.0) + return parser.parse_args(argv) + +if __name__ == '__main__': + main(parse_arguments(sys.argv[1:])) diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/decode_msceleb_dataset.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/decode_msceleb_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..258329b2831fca5fde6f5e464cecbde81070e6ea --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/decode_msceleb_dataset.py @@ -0,0 +1,114 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Decode the MsCelebV1 dataset in TSV (tab separated values) format downloaded from +https://www.microsoft.com/en-us/research/project/ms-celeb-1m-challenge-recognizing-one-million-celebrities-real-world/ +""" +# MIT License +# +# Copyright (c) 2016 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from scipy import misc +import numpy as np +import base64 +import sys +import os +import cv2 +import argparse +import facenet + + +# File format: text files, each line is an image record containing 6 columns, delimited by TAB. +# Column1: Freebase MID +# Column2: Query/Name +# Column3: ImageSearchRank +# Column4: ImageURL +# Column5: PageURL +# Column6: ImageData_Base64Encoded + +def main(args): + output_dir = os.path.expanduser(args.output_dir) + + if not os.path.exists(output_dir): + os.mkdir(output_dir) + + # Store some git revision info in a text file in the output directory + src_path,_ = os.path.split(os.path.realpath(__file__)) + facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) + + i = 0 + for f in args.tsv_files: + for line in f: + fields = line.split('\t') + class_dir = fields[0] + img_name = fields[1] + '-' + fields[4] + '.' + args.output_format + img_string = fields[5] + img_dec_string = base64.b64decode(img_string) + img_data = np.fromstring(img_dec_string, dtype=np.uint8) + img = cv2.imdecode(img_data, cv2.IMREAD_COLOR) #pylint: disable=maybe-no-member + if args.size: + img = misc.imresize(img, (args.size, args.size), interp='bilinear') + full_class_dir = os.path.join(output_dir, class_dir) + if not os.path.exists(full_class_dir): + os.mkdir(full_class_dir) + full_path = os.path.join(full_class_dir, img_name.replace('/','_')) + cv2.imwrite(full_path, img) #pylint: disable=maybe-no-member + print('%8d: %s' % (i, full_path)) + i += 1 + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + parser.add_argument('output_dir', type=str, help='Output base directory for the image dataset') + parser.add_argument('tsv_files', type=argparse.FileType('r'), nargs='+', help='Input TSV file name(s)') + parser.add_argument('--size', type=int, help='Images are resized to the given size') + parser.add_argument('--output_format', type=str, help='Format of the output images', default='png', choices=['png', 'jpg']) + + main(parser.parse_args()) + diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/download_and_extract.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/download_and_extract.py new file mode 100644 index 0000000000000000000000000000000000000000..a3bf64be1ca6dbc0279221fab1a26ea5edc5a360 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/download_and_extract.py @@ -0,0 +1,78 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import requests +import zipfile +import os + +model_dict = { + 'lfw-subset': '1B5BQUZuJO-paxdN8UclxeHAR1WnR_Tzi', + '20170131-234652': '0B5MzpY9kBtDVSGM0RmVET2EwVEk', + '20170216-091149': '0B5MzpY9kBtDVTGZjcWkzT3pldDA', + '20170512-110547': '0B5MzpY9kBtDVZ2RpVDYwWmxoSUk', + '20180402-114759': '1EXPBSXwTaqrSC0OhUdXNmKSh9qJUQ55-' + } + +def download_and_extract_file(model_name, data_dir): + file_id = model_dict[model_name] + destination = os.path.join(data_dir, model_name + '.zip') + if not os.path.exists(destination): + print('Downloading file to %s' % destination) + download_file_from_google_drive(file_id, destination) + with zipfile.ZipFile(destination, 'r') as zip_ref: + print('Extracting file to %s' % data_dir) + zip_ref.extractall(data_dir) + +def download_file_from_google_drive(file_id, destination): + + URL = "https://drive.google.com/uc?export=download" + + session = requests.Session() + + response = session.get(URL, params = { 'id' : file_id }, stream = True) + token = get_confirm_token(response) + + if token: + params = { 'id' : file_id, 'confirm' : token } + response = session.get(URL, params = params, stream = True) + + save_response_content(response, destination) + +def get_confirm_token(response): + for key, value in response.cookies.items(): + if key.startswith('download_warning'): + return value + + return None + +def save_response_content(response, destination): + CHUNK_SIZE = 32768 + + with open(destination, "wb") as f: + for chunk in response.iter_content(CHUNK_SIZE): + if chunk: # filter out keep-alive new chunks + f.write(chunk) diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/facenet.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/facenet.py new file mode 100644 index 0000000000000000000000000000000000000000..9f007edbd9a9d3bf655727e9bbd628d761f9c142 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/facenet.py @@ -0,0 +1,598 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Functions for building the face recognition network. +""" +# MIT License +# +# Copyright (c) 2016 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# pylint: disable=missing-docstring +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +from subprocess import Popen, PIPE +import tensorflow as tf +import numpy as np +from scipy import misc +from sklearn.model_selection import KFold +from scipy import interpolate +from tensorflow.python.training import training +import random +import re +from tensorflow.python.platform import gfile +import math +from six import iteritems + +def triplet_loss(anchor, positive, negative, alpha): + """Calculate the triplet loss according to the FaceNet paper + + Args: + anchor: the embeddings for the anchor images. + positive: the embeddings for the positive images. + negative: the embeddings for the negative images. + + Returns: + the triplet loss according to the FaceNet paper as a float tensor. + """ + with tf.variable_scope('triplet_loss'): + pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1) + neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1) + + basic_loss = tf.add(tf.subtract(pos_dist,neg_dist), alpha) + loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0) + + return loss + +def center_loss(features, label, alfa, nrof_classes): + """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" + (http://ydwen.github.io/papers/WenECCV16.pdf) + """ + nrof_features = features.get_shape()[1] + centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32, + initializer=tf.constant_initializer(0), trainable=False) + label = tf.reshape(label, [-1]) + centers_batch = tf.gather(centers, label) + diff = (1 - alfa) * (centers_batch - features) + centers = tf.scatter_sub(centers, label, diff) + with tf.control_dependencies([centers]): + loss = tf.reduce_mean(tf.square(features - centers_batch)) + return loss, centers + +def get_image_paths_and_labels(dataset): + image_paths_flat = [] + labels_flat = [] + for i in range(len(dataset)): + image_paths_flat += dataset[i].image_paths + labels_flat += [i] * len(dataset[i].image_paths) + return image_paths_flat, labels_flat + +def shuffle_examples(image_paths, labels): + shuffle_list = list(zip(image_paths, labels)) + random.shuffle(shuffle_list) + image_paths_shuff, labels_shuff = zip(*shuffle_list) + return image_paths_shuff, labels_shuff + +def random_rotate_image(image): + angle = np.random.uniform(low=-10.0, high=10.0) + return misc.imrotate(image, angle, 'bicubic') + +# 1: Random rotate 2: Random crop 4: Random flip 8: Fixed image standardization 16: Flip +RANDOM_ROTATE = 1 +RANDOM_CROP = 2 +RANDOM_FLIP = 4 +FIXED_STANDARDIZATION = 8 +FLIP = 16 +def create_input_pipeline(input_queue, image_size, nrof_preprocess_threads, batch_size_placeholder): + images_and_labels_list = [] + for _ in range(nrof_preprocess_threads): + filenames, label, control = input_queue.dequeue() + images = [] + for filename in tf.unstack(filenames): + file_contents = tf.read_file(filename) + image = tf.image.decode_image(file_contents, 3) + image = tf.cond(get_control_flag(control[0], RANDOM_ROTATE), + lambda:tf.py_func(random_rotate_image, [image], tf.uint8), + lambda:tf.identity(image)) + image = tf.cond(get_control_flag(control[0], RANDOM_CROP), + lambda:tf.random_crop(image, image_size + (3,)), + lambda:tf.image.resize_image_with_crop_or_pad(image, image_size[0], image_size[1])) + image = tf.cond(get_control_flag(control[0], RANDOM_FLIP), + lambda:tf.image.random_flip_left_right(image), + lambda:tf.identity(image)) + image = tf.cond(get_control_flag(control[0], FIXED_STANDARDIZATION), + lambda:(tf.cast(image, tf.float32) - 127.5)/128.0, + lambda:tf.image.per_image_standardization(image)) + image = tf.cond(get_control_flag(control[0], FLIP), + lambda:tf.image.flip_left_right(image), + lambda:tf.identity(image)) + #pylint: disable=no-member + image.set_shape(image_size + (3,)) + images.append(image) + images_and_labels_list.append([images, label]) + + image_batch, label_batch = tf.train.batch_join( + images_and_labels_list, batch_size=batch_size_placeholder, + shapes=[image_size + (3,), ()], enqueue_many=True, + capacity=4 * nrof_preprocess_threads * 100, + allow_smaller_final_batch=True) + + return image_batch, label_batch + +def get_control_flag(control, field): + return tf.equal(tf.mod(tf.floor_div(control, field), 2), 1) + +def _add_loss_summaries(total_loss): + """Add summaries for losses. + + Generates moving average for all losses and associated summaries for + visualizing the performance of the network. + + Args: + total_loss: Total loss from loss(). + Returns: + loss_averages_op: op for generating moving averages of losses. + """ + # Compute the moving average of all individual losses and the total loss. + loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') + losses = tf.get_collection('losses') + loss_averages_op = loss_averages.apply(losses + [total_loss]) + + # Attach a scalar summmary to all individual losses and the total loss; do the + # same for the averaged version of the losses. + for l in losses + [total_loss]: + # Name each loss as '(raw)' and name the moving average version of the loss + # as the original loss name. + tf.summary.scalar(l.op.name +' (raw)', l) + tf.summary.scalar(l.op.name, loss_averages.average(l)) + + return loss_averages_op + +def train(total_loss, global_step, optimizer, learning_rate, moving_average_decay, update_gradient_vars, log_histograms=True): + # Generate moving averages of all losses and associated summaries. + loss_averages_op = _add_loss_summaries(total_loss) + + # Compute gradients. + with tf.control_dependencies([loss_averages_op]): + if optimizer=='ADAGRAD': + opt = tf.train.AdagradOptimizer(learning_rate) + elif optimizer=='ADADELTA': + opt = tf.train.AdadeltaOptimizer(learning_rate, rho=0.9, epsilon=1e-6) + elif optimizer=='ADAM': + opt = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=0.1) + elif optimizer=='RMSPROP': + opt = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9, epsilon=1.0) + elif optimizer=='MOM': + opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True) + else: + raise ValueError('Invalid optimization algorithm') + + grads = opt.compute_gradients(total_loss, update_gradient_vars) + + # Apply gradients. + apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) + + # Add histograms for trainable variables. + if log_histograms: + for var in tf.trainable_variables(): + tf.summary.histogram(var.op.name, var) + + # Add histograms for gradients. + if log_histograms: + for grad, var in grads: + if grad is not None: + tf.summary.histogram(var.op.name + '/gradients', grad) + + # Track the moving averages of all trainable variables. + variable_averages = tf.train.ExponentialMovingAverage( + moving_average_decay, global_step) + variables_averages_op = variable_averages.apply(tf.trainable_variables()) + + with tf.control_dependencies([apply_gradient_op, variables_averages_op]): + train_op = tf.no_op(name='train') + + return train_op + +def prewhiten(x): + mean = np.mean(x) + std = np.std(x) + std_adj = np.maximum(std, 1.0/np.sqrt(x.size)) + y = np.multiply(np.subtract(x, mean), 1/std_adj) + return y + +def crop(image, random_crop, image_size): + if image.shape[1]>image_size: + sz1 = int(image.shape[1]//2) + sz2 = int(image_size//2) + if random_crop: + diff = sz1-sz2 + (h, v) = (np.random.randint(-diff, diff+1), np.random.randint(-diff, diff+1)) + else: + (h, v) = (0,0) + image = image[(sz1-sz2+v):(sz1+sz2+v),(sz1-sz2+h):(sz1+sz2+h),:] + return image + +def flip(image, random_flip): + if random_flip and np.random.choice([True, False]): + image = np.fliplr(image) + return image + +def to_rgb(img): + w, h = img.shape + ret = np.empty((w, h, 3), dtype=np.uint8) + ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img + return ret + +def load_data(image_paths, do_random_crop, do_random_flip, image_size, do_prewhiten=True): + nrof_samples = len(image_paths) + images = np.zeros((nrof_samples, image_size, image_size, 3)) + for i in range(nrof_samples): + img = misc.imread(image_paths[i]) + if img.ndim == 2: + img = to_rgb(img) + if do_prewhiten: + img = prewhiten(img) + img = crop(img, do_random_crop, image_size) + img = flip(img, do_random_flip) + images[i,:,:,:] = img + return images + +def get_label_batch(label_data, batch_size, batch_index): + nrof_examples = np.size(label_data, 0) + j = batch_index*batch_size % nrof_examples + if j+batch_size<=nrof_examples: + batch = label_data[j:j+batch_size] + else: + x1 = label_data[j:nrof_examples] + x2 = label_data[0:nrof_examples-j] + batch = np.vstack([x1,x2]) + batch_int = batch.astype(np.int64) + return batch_int + +def get_batch(image_data, batch_size, batch_index): + nrof_examples = np.size(image_data, 0) + j = batch_index*batch_size % nrof_examples + if j+batch_size<=nrof_examples: + batch = image_data[j:j+batch_size,:,:,:] + else: + x1 = image_data[j:nrof_examples,:,:,:] + x2 = image_data[0:nrof_examples-j,:,:,:] + batch = np.vstack([x1,x2]) + batch_float = batch.astype(np.float32) + return batch_float + +def get_triplet_batch(triplets, batch_index, batch_size): + ax, px, nx = triplets + a = get_batch(ax, int(batch_size/3), batch_index) + p = get_batch(px, int(batch_size/3), batch_index) + n = get_batch(nx, int(batch_size/3), batch_index) + batch = np.vstack([a, p, n]) + return batch + +def get_learning_rate_from_file(filename, epoch): + with open(filename, 'r') as f: + for line in f.readlines(): + line = line.split('#', 1)[0] + if line: + par = line.strip().split(':') + e = int(par[0]) + if par[1]=='-': + lr = -1 + else: + lr = float(par[1]) + if e <= epoch: + learning_rate = lr + else: + return learning_rate + +class ImageClass(): + "Stores the paths to images for a given class" + def __init__(self, name, image_paths): + self.name = name + self.image_paths = image_paths + + def __str__(self): + return self.name + ', ' + str(len(self.image_paths)) + ' images' + + def __len__(self): + return len(self.image_paths) + +def get_dataset(path, has_class_directories=True): + dataset = [] + path_exp = os.path.expanduser(path) + classes = [path for path in os.listdir(path_exp) \ + if os.path.isdir(os.path.join(path_exp, path))] + classes.sort() + nrof_classes = len(classes) + for i in range(nrof_classes): + class_name = classes[i] + facedir = os.path.join(path_exp, class_name) + image_paths = get_image_paths(facedir) + dataset.append(ImageClass(class_name, image_paths)) + + return dataset + +def get_image_paths(facedir): + image_paths = [] + if os.path.isdir(facedir): + images = os.listdir(facedir) + image_paths = [os.path.join(facedir,img) for img in images] + return image_paths + +def split_dataset(dataset, split_ratio, min_nrof_images_per_class, mode): + if mode=='SPLIT_CLASSES': + nrof_classes = len(dataset) + class_indices = np.arange(nrof_classes) + np.random.shuffle(class_indices) + split = int(round(nrof_classes*(1-split_ratio))) + train_set = [dataset[i] for i in class_indices[0:split]] + test_set = [dataset[i] for i in class_indices[split:-1]] + elif mode=='SPLIT_IMAGES': + train_set = [] + test_set = [] + for cls in dataset: + paths = cls.image_paths + np.random.shuffle(paths) + nrof_images_in_class = len(paths) + split = int(math.floor(nrof_images_in_class*(1-split_ratio))) + if split==nrof_images_in_class: + split = nrof_images_in_class-1 + if split>=min_nrof_images_per_class and nrof_images_in_class-split>=1: + train_set.append(ImageClass(cls.name, paths[:split])) + test_set.append(ImageClass(cls.name, paths[split:])) + else: + raise ValueError('Invalid train/test split mode "%s"' % mode) + return train_set, test_set + +def load_model(model, input_map=None): + # Check if the model is a model directory (containing a metagraph and a checkpoint file) + # or if it is a protobuf file with a frozen graph + model_exp = os.path.expanduser(model) + if (os.path.isfile(model_exp)): + print('Model filename: %s' % model_exp) + with gfile.FastGFile(model_exp,'rb') as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + tf.import_graph_def(graph_def, input_map=input_map, name='') + else: + print('Model directory: %s' % model_exp) + meta_file, ckpt_file = get_model_filenames(model_exp) + + print('Metagraph file: %s' % meta_file) + print('Checkpoint file: %s' % ckpt_file) + + saver = tf.train.import_meta_graph(os.path.join(model_exp, meta_file), input_map=input_map) + saver.restore(tf.get_default_session(), os.path.join(model_exp, ckpt_file)) + +def get_model_filenames(model_dir): + files = os.listdir(model_dir) + meta_files = [s for s in files if s.endswith('.meta')] + if len(meta_files)==0: + raise ValueError('No meta file found in the model directory (%s)' % model_dir) + elif len(meta_files)>1: + raise ValueError('There should not be more than one meta file in the model directory (%s)' % model_dir) + meta_file = meta_files[0] + ckpt = tf.train.get_checkpoint_state(model_dir) + if ckpt and ckpt.model_checkpoint_path: + ckpt_file = os.path.basename(ckpt.model_checkpoint_path) + return meta_file, ckpt_file + + meta_files = [s for s in files if '.ckpt' in s] + max_step = -1 + for f in files: + step_str = re.match(r'(^model-[\w\- ]+.ckpt-(\d+))', f) + if step_str is not None and len(step_str.groups())>=2: + step = int(step_str.groups()[1]) + if step > max_step: + max_step = step + ckpt_file = step_str.groups()[0] + return meta_file, ckpt_file + +def distance(embeddings1, embeddings2, distance_metric=0): + if distance_metric==0: + # Euclidian distance + diff = np.subtract(embeddings1, embeddings2) + dist = np.sum(np.square(diff),1) + elif distance_metric==1: + # Distance based on cosine similarity + dot = np.sum(np.multiply(embeddings1, embeddings2), axis=1) + norm = np.linalg.norm(embeddings1, axis=1) * np.linalg.norm(embeddings2, axis=1) + similarity = dot / norm + dist = np.arccos(similarity) / math.pi + else: + raise 'Undefined distance metric %d' % distance_metric + + return dist + +def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False): + assert(embeddings1.shape[0] == embeddings2.shape[0]) + assert(embeddings1.shape[1] == embeddings2.shape[1]) + nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) + nrof_thresholds = len(thresholds) + k_fold = KFold(n_splits=nrof_folds, shuffle=False) + + tprs = np.zeros((nrof_folds,nrof_thresholds)) + fprs = np.zeros((nrof_folds,nrof_thresholds)) + accuracy = np.zeros((nrof_folds)) + + indices = np.arange(nrof_pairs) + + for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): + if subtract_mean: + mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0) + else: + mean = 0.0 + dist = distance(embeddings1-mean, embeddings2-mean, distance_metric) + + # Find the best threshold for the fold + acc_train = np.zeros((nrof_thresholds)) + for threshold_idx, threshold in enumerate(thresholds): + _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set]) + best_threshold_index = np.argmax(acc_train) + for threshold_idx, threshold in enumerate(thresholds): + tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set]) + _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set]) + + tpr = np.mean(tprs,0) + fpr = np.mean(fprs,0) + return tpr, fpr, accuracy + +def calculate_accuracy(threshold, dist, actual_issame): + predict_issame = np.less(dist, threshold) + tp = np.sum(np.logical_and(predict_issame, actual_issame)) + fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) + tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame))) + fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame)) + + tpr = 0 if (tp+fn==0) else float(tp) / float(tp+fn) + fpr = 0 if (fp+tn==0) else float(fp) / float(fp+tn) + acc = float(tp+tn)/dist.size + return tpr, fpr, acc + + + +def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10, distance_metric=0, subtract_mean=False): + assert(embeddings1.shape[0] == embeddings2.shape[0]) + assert(embeddings1.shape[1] == embeddings2.shape[1]) + nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) + nrof_thresholds = len(thresholds) + k_fold = KFold(n_splits=nrof_folds, shuffle=False) + + val = np.zeros(nrof_folds) + far = np.zeros(nrof_folds) + + indices = np.arange(nrof_pairs) + + for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): + if subtract_mean: + mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0) + else: + mean = 0.0 + dist = distance(embeddings1-mean, embeddings2-mean, distance_metric) + + # Find the threshold that gives FAR = far_target + far_train = np.zeros(nrof_thresholds) + for threshold_idx, threshold in enumerate(thresholds): + _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set]) + if np.max(far_train)>=far_target: + f = interpolate.interp1d(far_train, thresholds, kind='slinear') + threshold = f(far_target) + else: + threshold = 0.0 + + val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set]) + + val_mean = np.mean(val) + far_mean = np.mean(far) + val_std = np.std(val) + return val_mean, val_std, far_mean + + +def calculate_val_far(threshold, dist, actual_issame): + predict_issame = np.less(dist, threshold) + true_accept = np.sum(np.logical_and(predict_issame, actual_issame)) + false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) + n_same = np.sum(actual_issame) + n_diff = np.sum(np.logical_not(actual_issame)) + val = float(true_accept) / float(n_same) + far = float(false_accept) / float(n_diff) + return val, far + +def store_revision_info(src_path, output_dir, arg_string): + try: + # Get git hash + cmd = ['git', 'rev-parse', 'HEAD'] + gitproc = Popen(cmd, stdout = PIPE, cwd=src_path) + (stdout, _) = gitproc.communicate() + git_hash = stdout.strip() + except OSError as e: + git_hash = ' '.join(cmd) + ': ' + e.strerror + + try: + # Get local changes + cmd = ['git', 'diff', 'HEAD'] + gitproc = Popen(cmd, stdout = PIPE, cwd=src_path) + (stdout, _) = gitproc.communicate() + git_diff = stdout.strip() + except OSError as e: + git_diff = ' '.join(cmd) + ': ' + e.strerror + + # Store a text file in the log directory + rev_info_filename = os.path.join(output_dir, 'revision_info.txt') + with open(rev_info_filename, "w") as text_file: + text_file.write('arguments: %s\n--------------------\n' % arg_string) + text_file.write('tensorflow version: %s\n--------------------\n' % tf.__version__) # @UndefinedVariable + text_file.write('git hash: %s\n--------------------\n' % git_hash) + text_file.write('%s' % git_diff) + +def list_variables(filename): + reader = training.NewCheckpointReader(filename) + variable_map = reader.get_variable_to_shape_map() + names = sorted(variable_map.keys()) + return names + +def put_images_on_grid(images, shape=(16,8)): + nrof_images = images.shape[0] + img_size = images.shape[1] + bw = 3 + img = np.zeros((shape[1]*(img_size+bw)+bw, shape[0]*(img_size+bw)+bw, 3), np.float32) + for i in range(shape[1]): + x_start = i*(img_size+bw)+bw + for j in range(shape[0]): + img_index = i*shape[0]+j + if img_index>=nrof_images: + break + y_start = j*(img_size+bw)+bw + img[x_start:x_start+img_size, y_start:y_start+img_size, :] = images[img_index, :, :, :] + if img_index>=nrof_images: + break + return img + +def write_arguments_to_file(args, filename): + with open(filename, 'w') as f: + for key, value in iteritems(vars(args)): + f.write('%s: %s\n' % (key, str(value))) diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/freeze_graph.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/freeze_graph.py new file mode 100644 index 0000000000000000000000000000000000000000..346d52d53ae150af8f10b9076d5ec0b48e05e2bb --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/freeze_graph.py @@ -0,0 +1,130 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Imports a model metagraph and checkpoint file, converts the variables to constants +and exports the model as a graphdef protobuf +""" +# MIT License +# +# Copyright (c) 2016 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import graph_util +import tensorflow as tf +import argparse +import os +import sys +import facenet +from six.moves import xrange # @UnresolvedImport + +def main(args): + with tf.Graph().as_default(): + with tf.Session() as sess: + # Load the model metagraph and checkpoint + print('Model directory: %s' % args.model_dir) + meta_file, ckpt_file = facenet.get_model_filenames(os.path.expanduser(args.model_dir)) + + print('Metagraph file: %s' % meta_file) + print('Checkpoint file: %s' % ckpt_file) + + model_dir_exp = os.path.expanduser(args.model_dir) + saver = tf.train.import_meta_graph(os.path.join(model_dir_exp, meta_file), clear_devices=True) + tf.get_default_session().run(tf.global_variables_initializer()) + tf.get_default_session().run(tf.local_variables_initializer()) + saver.restore(tf.get_default_session(), os.path.join(model_dir_exp, ckpt_file)) + + # Retrieve the protobuf graph definition and fix the batch norm nodes + input_graph_def = sess.graph.as_graph_def() + + # Freeze the graph def + output_graph_def = freeze_graph_def(sess, input_graph_def, 'embeddings,label_batch') + + # Serialize and dump the output graph to the filesystem + with tf.gfile.GFile(args.output_file, 'wb') as f: + f.write(output_graph_def.SerializeToString()) + print("%d ops in the final graph: %s" % (len(output_graph_def.node), args.output_file)) + +def freeze_graph_def(sess, input_graph_def, output_node_names): + for node in input_graph_def.node: + if node.op == 'RefSwitch': + node.op = 'Switch' + for index in xrange(len(node.input)): + if 'moving_' in node.input[index]: + node.input[index] = node.input[index] + '/read' + elif node.op == 'AssignSub': + node.op = 'Sub' + if 'use_locking' in node.attr: del node.attr['use_locking'] + elif node.op == 'AssignAdd': + node.op = 'Add' + if 'use_locking' in node.attr: del node.attr['use_locking'] + + # Get the list of important nodes + whitelist_names = [] + for node in input_graph_def.node: + if (node.name.startswith('InceptionResnet') or node.name.startswith('embeddings') or + node.name.startswith('image_batch') or node.name.startswith('label_batch') or + node.name.startswith('phase_train') or node.name.startswith('Logits')): + whitelist_names.append(node.name) + + # Replace all the variables in the graph with constants of the same values + output_graph_def = graph_util.convert_variables_to_constants( + sess, input_graph_def, output_node_names.split(","), + variable_names_whitelist=whitelist_names) + return output_graph_def + +def parse_arguments(argv): + parser = argparse.ArgumentParser() + + parser.add_argument('model_dir', type=str, + help='Directory containing the metagraph (.meta) file and the checkpoint (ckpt) file containing model parameters') + parser.add_argument('output_file', type=str, + help='Filename for the exported graphdef protobuf (.pb)') + return parser.parse_args(argv) + +if __name__ == '__main__': + main(parse_arguments(sys.argv[1:])) diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/.keep b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/calculate_attribute_vectors.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/calculate_attribute_vectors.py new file mode 100644 index 0000000000000000000000000000000000000000..540f24ec90e5861e0e0efa02c0ac5186f0ce78d5 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/calculate_attribute_vectors.py @@ -0,0 +1,227 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# MIT License +# +# Copyright (c) 2017 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Calculate average latent variables (here called attribute vectors) +for the different attributes in CelebA +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +import sys +import argparse +import importlib +import facenet +import os +import numpy as np +import math +import time +import h5py +from six import iteritems + +def main(args): + + img_mean = np.array([134.10714722, 102.52040863, 87.15436554]) + img_stddev = np.sqrt(np.array([3941.30175781, 2856.94287109, 2519.35791016])) + + vae_checkpoint = os.path.expanduser(args.vae_checkpoint) + + fields, attribs_dict = read_annotations(args.annotations_filename) + + vae_def = importlib.import_module(args.vae_def) + vae = vae_def.Vae(args.latent_var_size) + gen_image_size = vae.get_image_size() + + with tf.Graph().as_default(): + tf.set_random_seed(args.seed) + + image_list = facenet.get_image_paths(os.path.expanduser(args.data_dir)) + + # Get attributes for images + nrof_attributes = len(fields) + attribs_list = [] + for img in image_list: + key = os.path.split(img)[1].split('.')[0] + attr = attribs_dict[key] + assert len(attr)==nrof_attributes + attribs_list.append(attr) + + # Create the input queue + index_list = range(len(image_list)) + input_queue = tf.train.slice_input_producer([image_list, attribs_list, index_list], num_epochs=1, shuffle=False) + + nrof_preprocess_threads = 4 + image_per_thread = [] + for _ in range(nrof_preprocess_threads): + filename = input_queue[0] + file_contents = tf.read_file(filename) + image = tf.image.decode_image(file_contents, channels=3) + image = tf.image.resize_image_with_crop_or_pad(image, 160, 160) + #image = tf.image.resize_images(image, (64,64)) + image.set_shape((args.image_size, args.image_size, 3)) + attrib = input_queue[1] + attrib.set_shape((nrof_attributes,)) + image = tf.cast(image, tf.float32) + image_per_thread.append([image, attrib, input_queue[2]]) + + images, attribs, indices = tf.train.batch_join( + image_per_thread, batch_size=args.batch_size, + shapes=[(args.image_size, args.image_size, 3), (nrof_attributes,), ()], enqueue_many=False, + capacity=4 * nrof_preprocess_threads * args.batch_size, + allow_smaller_final_batch=True) + + # Normalize + images_norm = (images-img_mean) / img_stddev + + # Resize to appropriate size for the encoder + images_norm_resize = tf.image.resize_images(images_norm, (gen_image_size,gen_image_size)) + + # Create encoder network + mean, log_variance = vae.encoder(images_norm_resize, True) + + epsilon = tf.random_normal((tf.shape(mean)[0], args.latent_var_size)) + std = tf.exp(log_variance/2) + latent_var = mean + epsilon * std + + # Create a saver + saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) + + # Start running operations on the Graph + gpu_memory_fraction = 1.0 + gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction) + sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) + sess.run(tf.global_variables_initializer()) + sess.run(tf.local_variables_initializer()) + coord = tf.train.Coordinator() + tf.train.start_queue_runners(coord=coord, sess=sess) + + + with sess.as_default(): + + if vae_checkpoint: + print('Restoring VAE checkpoint: %s' % vae_checkpoint) + saver.restore(sess, vae_checkpoint) + + nrof_images = len(image_list) + nrof_batches = int(math.ceil(len(image_list) / args.batch_size)) + latent_vars = np.zeros((nrof_images, args.latent_var_size)) + attributes = np.zeros((nrof_images, nrof_attributes)) + for i in range(nrof_batches): + start_time = time.time() + latent_var_, attribs_, indices_ = sess.run([latent_var, attribs, indices]) + latent_vars[indices_,:] = latent_var_ + attributes[indices_,:] = attribs_ + duration = time.time() - start_time + print('Batch %d/%d: %.3f seconds' % (i+1, nrof_batches, duration)) + # NOTE: This will print the 'Out of range' warning if the last batch is not full, + # as described by https://github.com/tensorflow/tensorflow/issues/8330 + + # Calculate average change in the latent variable when each attribute changes + attribute_vectors = np.zeros((nrof_attributes, args.latent_var_size), np.float32) + for i in range(nrof_attributes): + pos_idx = np.argwhere(attributes[:,i]==1)[:,0] + neg_idx = np.argwhere(attributes[:,i]==-1)[:,0] + pos_avg = np.mean(latent_vars[pos_idx,:], 0) + neg_avg = np.mean(latent_vars[neg_idx,:], 0) + attribute_vectors[i,:] = pos_avg - neg_avg + + filename = os.path.expanduser(args.output_filename) + print('Writing attribute vectors, latent variables and attributes to %s' % filename) + mdict = {'latent_vars':latent_vars, 'attributes':attributes, + 'fields':fields, 'attribute_vectors':attribute_vectors } + with h5py.File(filename, 'w') as f: + for key, value in iteritems(mdict): + f.create_dataset(key, data=value) + + +def read_annotations(filename): + attribs = {} + with open(filename, 'r') as f: + for i, line in enumerate(f.readlines()): + if i==0: + continue # First line is the number of entries in the file + elif i==1: + fields = line.strip().split() # Second line is the field names + else: + line = line.split() + img_name = line[0].split('.')[0] + img_attribs = map(int, line[1:]) + attribs[img_name] = img_attribs + return fields, attribs + +def parse_arguments(argv): + parser = argparse.ArgumentParser() + + parser.add_argument('vae_def', type=str, + help='Model definition for the variational autoencoder. Points to a module containing the definition.', + default='src.generative.models.dfc_vae') + parser.add_argument('vae_checkpoint', type=str, + help='Checkpoint file of a pre-trained variational autoencoder.') + parser.add_argument('data_dir', type=str, + help='Path to the directory containing aligned face patches for the CelebA dataset.') + parser.add_argument('annotations_filename', type=str, + help='Path to the annotations file', + default='/media/deep/datasets/CelebA/Anno/list_attr_celeba.txt') + parser.add_argument('output_filename', type=str, + help='Filename to use for the file containing the attribute vectors.') + parser.add_argument('--batch_size', type=int, + help='Number of images to process in a batch.', default=128) + parser.add_argument('--image_size', type=int, + help='Image size (height, width) in pixels.', default=64) + parser.add_argument('--latent_var_size', type=int, + help='Dimensionality of the latent variable.', default=100) + parser.add_argument('--seed', type=int, + help='Random seed.', default=666) + + return parser.parse_args(argv) + + +if __name__ == '__main__': + main(parse_arguments(sys.argv[1:])) diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/models/.keep b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/models/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/models/dfc_vae.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/models/dfc_vae.py new file mode 100644 index 0000000000000000000000000000000000000000..f762fdf2bf5c2f0a0aae36890a60d2a27f417364 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/models/dfc_vae.py @@ -0,0 +1,119 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# MIT License +# +# Copyright (c) 2017 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Variational autoencoder based on the paper +'Deep Feature Consistent Variational Autoencoder' +(https://arxiv.org/pdf/1610.00291.pdf) +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +import tensorflow.contrib.slim as slim +import generative.models.vae_base # @UnresolvedImport + + +class Vae(generative.models.vae_base.Vae): + + def __init__(self, latent_variable_dim): + super(Vae, self).__init__(latent_variable_dim, 64) + + def encoder(self, images, is_training): + activation_fn = leaky_relu # tf.nn.relu + weight_decay = 0.0 + with tf.variable_scope('encoder'): + with slim.arg_scope([slim.batch_norm], + is_training=is_training): + with slim.arg_scope([slim.conv2d, slim.fully_connected], + weights_initializer=tf.truncated_normal_initializer(stddev=0.1), + weights_regularizer=slim.l2_regularizer(weight_decay), + normalizer_fn=slim.batch_norm, + normalizer_params=self.batch_norm_params): + net = slim.conv2d(images, 32, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_1') + net = slim.conv2d(net, 64, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_2') + net = slim.conv2d(net, 128, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_3') + net = slim.conv2d(net, 256, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_4') + net = slim.flatten(net) + fc1 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_1') + fc2 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_2') + return fc1, fc2 + + def decoder(self, latent_var, is_training): + activation_fn = leaky_relu # tf.nn.relu + weight_decay = 0.0 + with tf.variable_scope('decoder'): + with slim.arg_scope([slim.batch_norm], + is_training=is_training): + with slim.arg_scope([slim.conv2d, slim.fully_connected], + weights_initializer=tf.truncated_normal_initializer(stddev=0.1), + weights_regularizer=slim.l2_regularizer(weight_decay), + normalizer_fn=slim.batch_norm, + normalizer_params=self.batch_norm_params): + net = slim.fully_connected(latent_var, 4096, activation_fn=None, normalizer_fn=None, scope='Fc_1') + net = tf.reshape(net, [-1,4,4,256], name='Reshape') + + net = tf.image.resize_nearest_neighbor(net, size=(8,8), name='Upsample_1') + net = slim.conv2d(net, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1') + + net = tf.image.resize_nearest_neighbor(net, size=(16,16), name='Upsample_2') + net = slim.conv2d(net, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2') + + net = tf.image.resize_nearest_neighbor(net, size=(32,32), name='Upsample_3') + net = slim.conv2d(net, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3') + + net = tf.image.resize_nearest_neighbor(net, size=(64,64), name='Upsample_4') + net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=None, scope='Conv2d_4') + + return net + +def leaky_relu(x): + return tf.maximum(0.1*x,x) + \ No newline at end of file diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/models/dfc_vae_large.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/models/dfc_vae_large.py new file mode 100644 index 0000000000000000000000000000000000000000..a780ae6470247ed93cb494d8e6340fd79c71011f --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/models/dfc_vae_large.py @@ -0,0 +1,122 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# MIT License +# +# Copyright (c) 2017 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Variational autoencoder based on the paper +'Deep Feature Consistent Variational Autoencoder' +(https://arxiv.org/pdf/1610.00291.pdf) but with a larger image size (128x128 pixels) +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +import tensorflow.contrib.slim as slim +import generative.models.vae_base # @UnresolvedImport + + +class Vae(generative.models.vae_base.Vae): + + def __init__(self, latent_variable_dim): + super(Vae, self).__init__(latent_variable_dim, 128) + + + def encoder(self, images, is_training): + activation_fn = leaky_relu # tf.nn.relu + weight_decay = 0.0 + with tf.variable_scope('encoder'): + with slim.arg_scope([slim.batch_norm], + is_training=is_training): + with slim.arg_scope([slim.conv2d, slim.fully_connected], + weights_initializer=tf.truncated_normal_initializer(stddev=0.1), + weights_regularizer=slim.l2_regularizer(weight_decay), + normalizer_fn=slim.batch_norm, + normalizer_params=self.batch_norm_params): + net = slim.conv2d(images, 32, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_1') + net = slim.conv2d(net, 64, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_2') + net = slim.conv2d(net, 128, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_3') + net = slim.conv2d(net, 256, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_4') + net = slim.conv2d(net, 512, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_5') + net = slim.flatten(net) + fc1 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_1') + fc2 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_2') + return fc1, fc2 + + def decoder(self, latent_var, is_training): + activation_fn = leaky_relu # tf.nn.relu + weight_decay = 0.0 + with tf.variable_scope('decoder'): + with slim.arg_scope([slim.batch_norm], + is_training=is_training): + with slim.arg_scope([slim.conv2d, slim.fully_connected], + weights_initializer=tf.truncated_normal_initializer(stddev=0.1), + weights_regularizer=slim.l2_regularizer(weight_decay), + normalizer_fn=slim.batch_norm, + normalizer_params=self.batch_norm_params): + net = slim.fully_connected(latent_var, 4096, activation_fn=None, normalizer_fn=None, scope='Fc_1') + net = tf.reshape(net, [-1,4,4,256], name='Reshape') + + net = tf.image.resize_nearest_neighbor(net, size=(8,8), name='Upsample_1') + net = slim.conv2d(net, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1') + + net = tf.image.resize_nearest_neighbor(net, size=(16,16), name='Upsample_2') + net = slim.conv2d(net, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2') + + net = tf.image.resize_nearest_neighbor(net, size=(32,32), name='Upsample_3') + net = slim.conv2d(net, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3') + + net = tf.image.resize_nearest_neighbor(net, size=(64,64), name='Upsample_4') + net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4') + + net = tf.image.resize_nearest_neighbor(net, size=(128,128), name='Upsample_5') + net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=None, scope='Conv2d_5') + return net + +def leaky_relu(x): + return tf.maximum(0.1*x,x) diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/models/dfc_vae_resnet.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/models/dfc_vae_resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..f48dbde426dea3a72f330b4912078a4f6d38a49f --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/models/dfc_vae_resnet.py @@ -0,0 +1,137 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# MIT License +# +# Copyright (c) 2017 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Variational autoencoder based on the paper +'Deep Feature Consistent Variational Autoencoder' +(https://arxiv.org/pdf/1610.00291.pdf) +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +import tensorflow.contrib.slim as slim +import generative.models.vae_base # @UnresolvedImport + + +class Vae(generative.models.vae_base.Vae): + + def __init__(self, latent_variable_dim): + super(Vae, self).__init__(latent_variable_dim, 64) + + def encoder(self, images, is_training): + activation_fn = leaky_relu # tf.nn.relu + weight_decay = 0.0 + with tf.variable_scope('encoder'): + with slim.arg_scope([slim.batch_norm], + is_training=is_training): + with slim.arg_scope([slim.conv2d, slim.fully_connected], + weights_initializer=tf.truncated_normal_initializer(stddev=0.1), + weights_regularizer=slim.l2_regularizer(weight_decay), + normalizer_fn=slim.batch_norm, + normalizer_params=self.batch_norm_params): + net = images + + net = slim.conv2d(net, 32, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_1a') + net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_1b') + + net = slim.conv2d(net, 64, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_2a') + net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_2b') + + net = slim.conv2d(net, 128, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_3a') + net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_3b') + + net = slim.conv2d(net, 256, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_4a') + net = slim.repeat(net, 3, conv2d_block, 0.1, 256, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_4b') + + net = slim.flatten(net) + fc1 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_1') + fc2 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_2') + return fc1, fc2 + + def decoder(self, latent_var, is_training): + activation_fn = leaky_relu # tf.nn.relu + weight_decay = 0.0 + with tf.variable_scope('decoder'): + with slim.arg_scope([slim.batch_norm], + is_training=is_training): + with slim.arg_scope([slim.conv2d, slim.fully_connected], + weights_initializer=tf.truncated_normal_initializer(stddev=0.1), + weights_regularizer=slim.l2_regularizer(weight_decay), + normalizer_fn=slim.batch_norm, + normalizer_params=self.batch_norm_params): + net = slim.fully_connected(latent_var, 4096, activation_fn=None, normalizer_fn=None, scope='Fc_1') + net = tf.reshape(net, [-1,4,4,256], name='Reshape') + + net = tf.image.resize_nearest_neighbor(net, size=(8,8), name='Upsample_1') + net = slim.conv2d(net, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1a') + net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1b') + + net = tf.image.resize_nearest_neighbor(net, size=(16,16), name='Upsample_2') + net = slim.conv2d(net, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2a') + net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2b') + + net = tf.image.resize_nearest_neighbor(net, size=(32,32), name='Upsample_3') + net = slim.conv2d(net, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3a') + net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3b') + + net = tf.image.resize_nearest_neighbor(net, size=(64,64), name='Upsample_4') + net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4a') + net = slim.repeat(net, 3, conv2d_block, 0.1, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4b') + net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=None, scope='Conv2d_4c') + + return net + +def conv2d_block(inp, scale, *args, **kwargs): + return inp + slim.conv2d(inp, *args, **kwargs) * scale + +def leaky_relu(x): + return tf.maximum(0.1*x,x) + \ No newline at end of file diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/models/vae_base.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/models/vae_base.py new file mode 100644 index 0000000000000000000000000000000000000000..cc34d2d31054706673a832d8f22a5753935aa921 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/models/vae_base.py @@ -0,0 +1,85 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# MIT License +# +# Copyright (c) 2017 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Base class for variational autoencoders containing an encoder and a decoder +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +class Vae(object): + + def __init__(self, latent_variable_dim, image_size): + self.latent_variable_dim = latent_variable_dim + self.image_size = image_size + self.batch_norm_params = { + # Decay for the moving averages. + 'decay': 0.995, + # epsilon to prevent 0s in variance. + 'epsilon': 0.001, + # force in-place updates of mean and variance estimates + 'updates_collections': None, + # Moving averages ends up in the trainable variables collection + 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ], + } + + def encoder(self, images, is_training): + # Must be overridden in implementation classes + raise NotImplementedError + + def decoder(self, latent_var, is_training): + # Must be overridden in implementation classes + raise NotImplementedError + + def get_image_size(self): + return self.image_size + \ No newline at end of file diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/modify_attribute.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/modify_attribute.py new file mode 100644 index 0000000000000000000000000000000000000000..c3d2787005e69d38b23edb2c8d7c62db70edcc4b --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/modify_attribute.py @@ -0,0 +1,169 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# MIT License +# +# Copyright (c) 2017 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Modify attributes of images using attribute vectors calculated using +'calculate_attribute_vectors.py'. Images are generated from latent variables of +the CelebA dataset. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +import sys +import argparse +import importlib +import facenet +import os +import numpy as np +import h5py +import math +from scipy import misc + +def main(args): + + img_mean = np.array([134.10714722, 102.52040863, 87.15436554]) + img_stddev = np.sqrt(np.array([3941.30175781, 2856.94287109, 2519.35791016])) + + vae_def = importlib.import_module(args.vae_def) + vae = vae_def.Vae(args.latent_var_size) + gen_image_size = vae.get_image_size() + + with tf.Graph().as_default(): + tf.set_random_seed(args.seed) + + images = tf.placeholder(tf.float32, shape=(None,gen_image_size,gen_image_size,3), name='input') + + # Normalize + images_norm = (images-img_mean) / img_stddev + + # Resize to appropriate size for the encoder + images_norm_resize = tf.image.resize_images(images_norm, (gen_image_size,gen_image_size)) + + # Create encoder network + mean, log_variance = vae.encoder(images_norm_resize, True) + + epsilon = tf.random_normal((tf.shape(mean)[0], args.latent_var_size)) + std = tf.exp(log_variance/2) + latent_var = mean + epsilon * std + + # Create decoder + reconstructed_norm = vae.decoder(latent_var, False) + + # Un-normalize + reconstructed = (reconstructed_norm*img_stddev) + img_mean + + # Create a saver + saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) + + # Start running operations on the Graph + gpu_memory_fraction = 1.0 + gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction) + sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) + sess.run(tf.global_variables_initializer()) + sess.run(tf.local_variables_initializer()) + coord = tf.train.Coordinator() + tf.train.start_queue_runners(coord=coord, sess=sess) + + + with sess.as_default(): + + vae_checkpoint = os.path.expanduser(args.vae_checkpoint) + print('Restoring VAE checkpoint: %s' % vae_checkpoint) + saver.restore(sess, vae_checkpoint) + + filename = os.path.expanduser(args.attributes_filename) + with h5py.File(filename,'r') as f: + latent_vars = np.array(f.get('latent_vars')) + attributes = np.array(f.get('attributes')) + #fields = np.array(f.get('fields')) + attribute_vectors = np.array(f.get('attribute_vectors')) + + # Reconstruct faces while adding varying amount of the selected attribute vector + attribute_index = 31 # 31: 'Smiling' + image_indices = [8,11,13,18,19,26,31,39,47,54,56,57,58,59,60,73] + nrof_images = len(image_indices) + nrof_interp_steps = 10 + sweep_latent_var = np.zeros((nrof_interp_steps*nrof_images, args.latent_var_size), np.float32) + for j in range(nrof_images): + image_index = image_indices[j] + idx = np.argwhere(attributes[:,attribute_index]==-1)[image_index,0] + for i in range(nrof_interp_steps): + sweep_latent_var[i+nrof_interp_steps*j,:] = latent_vars[idx,:] + 5.0*i/nrof_interp_steps*attribute_vectors[attribute_index,:] + + recon = sess.run(reconstructed, feed_dict={latent_var:sweep_latent_var}) + + img = facenet.put_images_on_grid(recon, shape=(nrof_interp_steps*2,int(math.ceil(nrof_images/2)))) + + image_filename = os.path.expanduser(args.output_image_filename) + print('Writing generated image to %s' % image_filename) + misc.imsave(image_filename, img) + + +def parse_arguments(argv): + parser = argparse.ArgumentParser() + + parser.add_argument('vae_def', type=str, + help='Model definition for the variational autoencoder. Points to a module containing the definition.') + parser.add_argument('vae_checkpoint', type=str, + help='Checkpoint file of a pre-trained variational autoencoder.') + parser.add_argument('attributes_filename', type=str, + help='The file containing the attribute vectors, as generated by calculate_attribute_vectors.py.') + parser.add_argument('output_image_filename', type=str, + help='File to write the generated image to.') + parser.add_argument('--latent_var_size', type=int, + help='Dimensionality of the latent variable.', default=100) + parser.add_argument('--seed', type=int, + help='Random seed.', default=666) + + return parser.parse_args(argv) + + +if __name__ == '__main__': + main(parse_arguments(sys.argv[1:])) diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/train_vae.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/train_vae.py new file mode 100644 index 0000000000000000000000000000000000000000..de9e15bae11c1d8d76f86c9bd4dc6dc2e264a0a8 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/generative/train_vae.py @@ -0,0 +1,311 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# MIT License +# +# Copyright (c) 2017 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Train a Variational Autoencoder +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +import tensorflow.contrib.slim as slim +import sys +import time +import importlib +import argparse +import facenet +import numpy as np +import h5py +import os +from datetime import datetime +from scipy import misc +from six import iteritems + +def main(args): + + img_mean = np.array([134.10714722, 102.52040863, 87.15436554]) + img_stddev = np.sqrt(np.array([3941.30175781, 2856.94287109, 2519.35791016])) + + vae_def = importlib.import_module(args.vae_def) + vae = vae_def.Vae(args.latent_var_size) + gen_image_size = vae.get_image_size() + + subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') + model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) + if not os.path.isdir(model_dir): # Create the model directory if it doesn't exist + os.makedirs(model_dir) + log_file_name = os.path.join(model_dir, 'logs.h5') + + # Write arguments to a text file + facenet.write_arguments_to_file(args, os.path.join(model_dir, 'arguments.txt')) + + # Store some git revision info in a text file in the log directory + src_path,_ = os.path.split(os.path.realpath(__file__)) + facenet.store_revision_info(src_path, model_dir, ' '.join(sys.argv)) + + with tf.Graph().as_default(): + tf.set_random_seed(args.seed) + global_step = tf.Variable(0, trainable=False) + + train_set = facenet.get_dataset(args.data_dir) + image_list, _ = facenet.get_image_paths_and_labels(train_set) + + # Create the input queue + input_queue = tf.train.string_input_producer(image_list, shuffle=True) + + nrof_preprocess_threads = 4 + image_per_thread = [] + for _ in range(nrof_preprocess_threads): + file_contents = tf.read_file(input_queue.dequeue()) + image = tf.image.decode_image(file_contents, channels=3) + image = tf.image.resize_image_with_crop_or_pad(image, args.input_image_size, args.input_image_size) + image.set_shape((args.input_image_size, args.input_image_size, 3)) + image = tf.cast(image, tf.float32) + #pylint: disable=no-member + image_per_thread.append([image]) + + images = tf.train.batch_join( + image_per_thread, batch_size=args.batch_size, + capacity=4 * nrof_preprocess_threads * args.batch_size, + allow_smaller_final_batch=False) + + # Normalize + images_norm = (images-img_mean) / img_stddev + + # Resize to appropriate size for the encoder + images_norm_resize = tf.image.resize_images(images_norm, (gen_image_size,gen_image_size)) + + # Create encoder network + mean, log_variance = vae.encoder(images_norm_resize, True) + + epsilon = tf.random_normal((tf.shape(mean)[0], args.latent_var_size)) + std = tf.exp(log_variance/2) + latent_var = mean + epsilon * std + + # Create decoder network + reconstructed_norm = vae.decoder(latent_var, True) + + # Un-normalize + reconstructed = (reconstructed_norm*img_stddev) + img_mean + + # Create reconstruction loss + if args.reconstruction_loss_type=='PLAIN': + images_resize = tf.image.resize_images(images, (gen_image_size,gen_image_size)) + reconstruction_loss = tf.reduce_mean(tf.reduce_sum(tf.pow(images_resize - reconstructed,2))) + elif args.reconstruction_loss_type=='PERCEPTUAL': + network = importlib.import_module(args.model_def) + + reconstructed_norm_resize = tf.image.resize_images(reconstructed_norm, (args.input_image_size,args.input_image_size)) + + # Stack images from both the input batch and the reconstructed batch in a new tensor + shp = [-1] + images_norm.get_shape().as_list()[1:] + input_images = tf.reshape(tf.stack([images_norm, reconstructed_norm_resize], axis=0), shp) + _, end_points = network.inference(input_images, 1.0, + phase_train=False, bottleneck_layer_size=128, weight_decay=0.0) + + # Get a list of feature names to use for loss terms + feature_names = args.loss_features.replace(' ', '').split(',') + + # Calculate L2 loss between original and reconstructed images in feature space + reconstruction_loss_list = [] + for feature_name in feature_names: + feature_flat = slim.flatten(end_points[feature_name]) + image_feature, reconstructed_feature = tf.unstack(tf.reshape(feature_flat, [2,args.batch_size,-1]), num=2, axis=0) + reconstruction_loss = tf.reduce_mean(tf.reduce_sum(tf.pow(image_feature-reconstructed_feature, 2)), name=feature_name+'_loss') + reconstruction_loss_list.append(reconstruction_loss) + # Sum up the losses in for the different features + reconstruction_loss = tf.add_n(reconstruction_loss_list, 'reconstruction_loss') + else: + pass + + # Create KL divergence loss + kl_loss = kl_divergence_loss(mean, log_variance) + kl_loss_mean = tf.reduce_mean(kl_loss) + + total_loss = args.alfa*kl_loss_mean + args.beta*reconstruction_loss + + learning_rate = tf.train.exponential_decay(args.initial_learning_rate, global_step, + args.learning_rate_decay_steps, args.learning_rate_decay_factor, staircase=True) + + # Calculate gradients and make sure not to include parameters for the perceptual loss model + opt = tf.train.AdamOptimizer(learning_rate) + grads = opt.compute_gradients(total_loss, var_list=get_variables_to_train()) + + # Apply gradients + apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) + with tf.control_dependencies([apply_gradient_op]): + train_op = tf.no_op(name='train') + + # Create a saver + saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) + + facenet_saver = tf.train.Saver(get_facenet_variables_to_restore()) + + # Start running operations on the Graph + gpu_memory_fraction = 1.0 + gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction) + sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) + sess.run(tf.global_variables_initializer()) + sess.run(tf.local_variables_initializer()) + coord = tf.train.Coordinator() + tf.train.start_queue_runners(coord=coord, sess=sess) + + with sess.as_default(): + + if args.reconstruction_loss_type=='PERCEPTUAL': + if not args.pretrained_model: + raise ValueError('A pretrained model must be specified when using perceptual loss') + pretrained_model_exp = os.path.expanduser(args.pretrained_model) + print('Restoring pretrained model: %s' % pretrained_model_exp) + facenet_saver.restore(sess, pretrained_model_exp) + + log = { + 'total_loss': np.zeros((0,), np.float), + 'reconstruction_loss': np.zeros((0,), np.float), + 'kl_loss': np.zeros((0,), np.float), + 'learning_rate': np.zeros((0,), np.float), + } + + step = 0 + print('Running training') + while step < args.max_nrof_steps: + start_time = time.time() + step += 1 + save_state = step>0 and (step % args.save_every_n_steps==0 or step==args.max_nrof_steps) + if save_state: + _, reconstruction_loss_, kl_loss_mean_, total_loss_, learning_rate_, rec_ = sess.run( + [train_op, reconstruction_loss, kl_loss_mean, total_loss, learning_rate, reconstructed]) + img = facenet.put_images_on_grid(rec_, shape=(16,8)) + misc.imsave(os.path.join(model_dir, 'reconstructed_%06d.png' % step), img) + else: + _, reconstruction_loss_, kl_loss_mean_, total_loss_, learning_rate_ = sess.run( + [train_op, reconstruction_loss, kl_loss_mean, total_loss, learning_rate]) + log['total_loss'] = np.append(log['total_loss'], total_loss_) + log['reconstruction_loss'] = np.append(log['reconstruction_loss'], reconstruction_loss_) + log['kl_loss'] = np.append(log['kl_loss'], kl_loss_mean_) + log['learning_rate'] = np.append(log['learning_rate'], learning_rate_) + + duration = time.time() - start_time + print('Step: %d \tTime: %.3f \trec_loss: %.3f \tkl_loss: %.3f \ttotal_loss: %.3f' % (step, duration, reconstruction_loss_, kl_loss_mean_, total_loss_)) + + if save_state: + print('Saving checkpoint file') + checkpoint_path = os.path.join(model_dir, 'model.ckpt') + saver.save(sess, checkpoint_path, global_step=step, write_meta_graph=False) + print('Saving log') + with h5py.File(log_file_name, 'w') as f: + for key, value in iteritems(log): + f.create_dataset(key, data=value) + +def get_variables_to_train(): + train_variables = [] + for var in tf.trainable_variables(): + if 'Inception' not in var.name: + train_variables.append(var) + return train_variables + +def get_facenet_variables_to_restore(): + facenet_variables = [] + for var in tf.global_variables(): + if var.name.startswith('Inception'): + if 'Adam' not in var.name: + facenet_variables.append(var) + return facenet_variables + +def kl_divergence_loss(mean, log_variance): + kl = 0.5 * tf.reduce_sum( tf.exp(log_variance) + tf.square(mean) - 1.0 - log_variance, reduction_indices = 1) + return kl + +def parse_arguments(argv): + parser = argparse.ArgumentParser() + + parser.add_argument('vae_def', type=str, + help='Model definition for the variational autoencoder. Points to a module containing the definition.') + parser.add_argument('data_dir', type=str, + help='Path to the data directory containing aligned face patches.') + parser.add_argument('model_def', type=str, + help='Model definition. Points to a module containing the definition of the inference graph.') + parser.add_argument('pretrained_model', type=str, + help='Pretrained model to use to calculate features for perceptual loss.') + parser.add_argument('--models_base_dir', type=str, + help='Directory where to write trained models and checkpoints.', default='~/vae') + parser.add_argument('--loss_features', type=str, + help='Comma separated list of features to use for perceptual loss. Features should be defined ' + + 'in the end_points dictionary.', default='Conv2d_1a_3x3,Conv2d_2a_3x3, Conv2d_2b_3x3') + parser.add_argument('--reconstruction_loss_type', type=str, choices=['PLAIN', 'PERCEPTUAL'], + help='The type of reconstruction loss to use', default='PERCEPTUAL') + parser.add_argument('--max_nrof_steps', type=int, + help='Number of steps to run.', default=50000) + parser.add_argument('--save_every_n_steps', type=int, + help='Number of steps between storing of model checkpoint and log files', default=500) + parser.add_argument('--batch_size', type=int, + help='Number of images to process in a batch.', default=128) + parser.add_argument('--input_image_size', type=int, + help='Image size of input images (height, width) in pixels. If perceptual loss is used this ' + + 'should be the input image size for the perceptual loss model', default=160) + parser.add_argument('--latent_var_size', type=int, + help='Dimensionality of the latent variable.', default=100) + parser.add_argument('--initial_learning_rate', type=float, + help='Initial learning rate.', default=0.0005) + parser.add_argument('--learning_rate_decay_steps', type=int, + help='Number of steps between learning rate decay.', default=1) + parser.add_argument('--learning_rate_decay_factor', type=float, + help='Learning rate decay factor.', default=1.0) + parser.add_argument('--seed', type=int, + help='Random seed.', default=666) + parser.add_argument('--alfa', type=float, + help='Kullback-Leibler divergence loss factor.', default=1.0) + parser.add_argument('--beta', type=float, + help='Reconstruction loss factor.', default=0.5) + + return parser.parse_args(argv) + + +if __name__ == '__main__': + main(parse_arguments(sys.argv[1:])) diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/lfw.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/lfw.py new file mode 100644 index 0000000000000000000000000000000000000000..9f0f7f1bc98a2fb31a274d2ff1a3a3512f6c068b --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/lfw.py @@ -0,0 +1,113 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Helper for evaluation on the Labeled Faces in the Wild dataset +""" + +# MIT License +# +# Copyright (c) 2016 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import numpy as np +import facenet + +def evaluate(embeddings, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False): + # Calculate evaluation metrics + thresholds = np.arange(0, 4, 0.01) + embeddings1 = embeddings[0::2] + embeddings2 = embeddings[1::2] + tpr, fpr, accuracy = facenet.calculate_roc(thresholds, embeddings1, embeddings2, + np.asarray(actual_issame), nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean) + thresholds = np.arange(0, 4, 0.001) + val, val_std, far = facenet.calculate_val(thresholds, embeddings1, embeddings2, + np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean) + return tpr, fpr, accuracy, val, val_std, far + +def get_paths(lfw_dir, pairs): + nrof_skipped_pairs = 0 + path_list = [] + issame_list = [] + for pair in pairs: + if len(pair) == 3: + path0 = add_extension(os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[1]))) + path1 = add_extension(os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[2]))) + issame = True + elif len(pair) == 4: + path0 = add_extension(os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[1]))) + path1 = add_extension(os.path.join(lfw_dir, pair[2], pair[2] + '_' + '%04d' % int(pair[3]))) + issame = False + if os.path.exists(path0) and os.path.exists(path1): # Only add the pair if both paths exist + path_list += (path0,path1) + issame_list.append(issame) + else: + nrof_skipped_pairs += 1 + if nrof_skipped_pairs>0: + print('Skipped %d image pairs' % nrof_skipped_pairs) + + return path_list, issame_list + +def add_extension(path): + if os.path.exists(path+'.jpg'): + return path+'.jpg' + elif os.path.exists(path+'.png'): + return path+'.png' + else: + raise RuntimeError('No file "%s" with extension png or jpg.' % path) + +def read_pairs(pairs_filename): + pairs = [] + with open(pairs_filename, 'r') as f: + for line in f.readlines()[1:]: + pair = line.strip().split() + pairs.append(pair) + return np.array(pairs) + + + diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/models/.keep b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/models/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/models/__init__.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..efa625274e1821107bb00a5092a93b2f8932a847 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/models/__init__.py @@ -0,0 +1,2 @@ +# flake8: noqa + diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/models/dummy.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/models/dummy.py new file mode 100644 index 0000000000000000000000000000000000000000..f8db80baf55e279cfa4fa6a60171f6f67ac7ad60 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/models/dummy.py @@ -0,0 +1,81 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Dummy model used only for testing +""" +# MIT License +# +# Copyright (c) 2016 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +import tensorflow.contrib.slim as slim +import numpy as np + +def inference(images, keep_probability, phase_train=True, # @UnusedVariable + bottleneck_layer_size=128, bottleneck_layer_activation=None, weight_decay=0.0, reuse=None): # @UnusedVariable + batch_norm_params = { + # Decay for the moving averages. + 'decay': 0.995, + # epsilon to prevent 0s in variance. + 'epsilon': 0.001, + # force in-place updates of mean and variance estimates + 'updates_collections': None, + # Moving averages ends up in the trainable variables collection + 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ], + } + + with slim.arg_scope([slim.conv2d, slim.fully_connected], + weights_initializer=tf.truncated_normal_initializer(stddev=0.1), + weights_regularizer=slim.l2_regularizer(weight_decay), + normalizer_fn=slim.batch_norm, + normalizer_params=batch_norm_params): + size = np.prod(images.get_shape()[1:].as_list()) + net = slim.fully_connected(tf.reshape(images, (-1,size)), bottleneck_layer_size, activation_fn=None, + scope='Bottleneck', reuse=False) + return net, None diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/models/inception_resnet_v1.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/models/inception_resnet_v1.py new file mode 100644 index 0000000000000000000000000000000000000000..02a99cdc16d17b8a8c44321ed0fdebafd7b7d8e6 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/models/inception_resnet_v1.py @@ -0,0 +1,273 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Contains the definition of the Inception Resnet V1 architecture. +As described in http://arxiv.org/abs/1602.07261. + Inception-v4, Inception-ResNet and the Impact of Residual Connections + on Learning + Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +import tensorflow.contrib.slim as slim + +# Inception-Resnet-A +def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): + """Builds the 35x35 resnet block.""" + with tf.variable_scope(scope, 'Block35', [net], reuse=reuse): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1') + with tf.variable_scope('Branch_1'): + tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') + tower_conv2_1 = slim.conv2d(tower_conv2_0, 32, 3, scope='Conv2d_0b_3x3') + tower_conv2_2 = slim.conv2d(tower_conv2_1, 32, 3, scope='Conv2d_0c_3x3') + mixed = tf.concat([tower_conv, tower_conv1_1, tower_conv2_2], 3) + up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, + activation_fn=None, scope='Conv2d_1x1') + net += scale * up + if activation_fn: + net = activation_fn(net) + return net + +# Inception-Resnet-B +def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): + """Builds the 17x17 resnet block.""" + with tf.variable_scope(scope, 'Block17', [net], reuse=reuse): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 128, 1, scope='Conv2d_1x1') + with tf.variable_scope('Branch_1'): + tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1_0, 128, [1, 7], + scope='Conv2d_0b_1x7') + tower_conv1_2 = slim.conv2d(tower_conv1_1, 128, [7, 1], + scope='Conv2d_0c_7x1') + mixed = tf.concat([tower_conv, tower_conv1_2], 3) + up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, + activation_fn=None, scope='Conv2d_1x1') + net += scale * up + if activation_fn: + net = activation_fn(net) + return net + + +# Inception-Resnet-C +def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): + """Builds the 8x8 resnet block.""" + with tf.variable_scope(scope, 'Block8', [net], reuse=reuse): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') + with tf.variable_scope('Branch_1'): + tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1_0, 192, [1, 3], + scope='Conv2d_0b_1x3') + tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [3, 1], + scope='Conv2d_0c_3x1') + mixed = tf.concat([tower_conv, tower_conv1_2], 3) + up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, + activation_fn=None, scope='Conv2d_1x1') + net += scale * up + if activation_fn: + net = activation_fn(net) + return net + +def reduction_a(net, k, l, m, n): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, n, 3, stride=2, padding='VALID', + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + tower_conv1_0 = slim.conv2d(net, k, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1_0, l, 3, + scope='Conv2d_0b_3x3') + tower_conv1_2 = slim.conv2d(tower_conv1_1, m, 3, + stride=2, padding='VALID', + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_2'): + tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', + scope='MaxPool_1a_3x3') + net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) + return net + +def reduction_b(net): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') + tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1, 256, 3, stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_2'): + tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') + tower_conv2_1 = slim.conv2d(tower_conv2, 256, 3, + scope='Conv2d_0b_3x3') + tower_conv2_2 = slim.conv2d(tower_conv2_1, 256, 3, stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_3'): + tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', + scope='MaxPool_1a_3x3') + net = tf.concat([tower_conv_1, tower_conv1_1, + tower_conv2_2, tower_pool], 3) + return net + +def inference(images, keep_probability, phase_train=True, + bottleneck_layer_size=128, weight_decay=0.0, reuse=None): + batch_norm_params = { + # Decay for the moving averages. + 'decay': 0.995, + # epsilon to prevent 0s in variance. + 'epsilon': 0.001, + # force in-place updates of mean and variance estimates + 'updates_collections': None, + # Moving averages ends up in the trainable variables collection + 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ], + } + + with slim.arg_scope([slim.conv2d, slim.fully_connected], + weights_initializer=slim.initializers.xavier_initializer(), + weights_regularizer=slim.l2_regularizer(weight_decay), + normalizer_fn=slim.batch_norm, + normalizer_params=batch_norm_params): + return inception_resnet_v1(images, is_training=phase_train, + dropout_keep_prob=keep_probability, bottleneck_layer_size=bottleneck_layer_size, reuse=reuse) + + +def inception_resnet_v1(inputs, is_training=True, + dropout_keep_prob=0.8, + bottleneck_layer_size=128, + reuse=None, + scope='InceptionResnetV1'): + """Creates the Inception Resnet V1 model. + Args: + inputs: a 4-D tensor of size [batch_size, height, width, 3]. + num_classes: number of predicted classes. + is_training: whether is training or not. + dropout_keep_prob: float, the fraction to keep before final layer. + reuse: whether or not the network and its variables should be reused. To be + able to reuse 'scope' must be given. + scope: Optional variable_scope. + Returns: + logits: the logits outputs of the model. + end_points: the set of end_points from the inception model. + """ + end_points = {} + + with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse): + with slim.arg_scope([slim.batch_norm, slim.dropout], + is_training=is_training): + with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], + stride=1, padding='SAME'): + + # 149 x 149 x 32 + net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', + scope='Conv2d_1a_3x3') + end_points['Conv2d_1a_3x3'] = net + # 147 x 147 x 32 + net = slim.conv2d(net, 32, 3, padding='VALID', + scope='Conv2d_2a_3x3') + end_points['Conv2d_2a_3x3'] = net + # 147 x 147 x 64 + net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') + end_points['Conv2d_2b_3x3'] = net + # 73 x 73 x 64 + net = slim.max_pool2d(net, 3, stride=2, padding='VALID', + scope='MaxPool_3a_3x3') + end_points['MaxPool_3a_3x3'] = net + # 73 x 73 x 80 + net = slim.conv2d(net, 80, 1, padding='VALID', + scope='Conv2d_3b_1x1') + end_points['Conv2d_3b_1x1'] = net + # 71 x 71 x 192 + net = slim.conv2d(net, 192, 3, padding='VALID', + scope='Conv2d_4a_3x3') + end_points['Conv2d_4a_3x3'] = net + # 35 x 35 x 256 + net = slim.conv2d(net, 256, 3, stride=2, padding='VALID', + scope='Conv2d_4b_3x3') + end_points['Conv2d_4b_3x3'] = net + + # 5 x Inception-resnet-A + net = slim.repeat(net, 5, block35, scale=0.17) + end_points['Mixed_5a'] = net + + # Reduction-A + with tf.variable_scope('Mixed_6a'): + net = reduction_a(net, 192, 192, 256, 384) + end_points['Mixed_6a'] = net + + # 10 x Inception-Resnet-B + net = slim.repeat(net, 10, block17, scale=0.10) + end_points['Mixed_6b'] = net + + # Reduction-B + with tf.variable_scope('Mixed_7a'): + net = reduction_b(net) + end_points['Mixed_7a'] = net + + # 5 x Inception-Resnet-C + net = slim.repeat(net, 5, block8, scale=0.20) + end_points['Mixed_8a'] = net + + net = block8(net, activation_fn=None) + end_points['Mixed_8b'] = net + + with tf.variable_scope('Logits'): + end_points['PrePool'] = net + #pylint: disable=no-member + net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', + scope='AvgPool_1a_8x8') + net = slim.flatten(net) + + net = slim.dropout(net, dropout_keep_prob, is_training=is_training, + scope='Dropout') + + end_points['PreLogitsFlatten'] = net + + net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, + scope='Bottleneck', reuse=False) + + return net, end_points diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/models/inception_resnet_v2.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/models/inception_resnet_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..4e58c75f2dff3ba3e78649a218d5b3e2b0f8363c --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/models/inception_resnet_v2.py @@ -0,0 +1,282 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Contains the definition of the Inception Resnet V2 architecture. +As described in http://arxiv.org/abs/1602.07261. + Inception-v4, Inception-ResNet and the Impact of Residual Connections + on Learning + Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +import tensorflow.contrib.slim as slim + +# Inception-Resnet-A +def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): + """Builds the 35x35 resnet block.""" + with tf.variable_scope(scope, 'Block35', [net], reuse=reuse): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1') + with tf.variable_scope('Branch_1'): + tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') + tower_conv2_1 = slim.conv2d(tower_conv2_0, 48, 3, scope='Conv2d_0b_3x3') + tower_conv2_2 = slim.conv2d(tower_conv2_1, 64, 3, scope='Conv2d_0c_3x3') + mixed = tf.concat([tower_conv, tower_conv1_1, tower_conv2_2], 3) + up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, + activation_fn=None, scope='Conv2d_1x1') + net += scale * up + if activation_fn: + net = activation_fn(net) + return net + +# Inception-Resnet-B +def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): + """Builds the 17x17 resnet block.""" + with tf.variable_scope(scope, 'Block17', [net], reuse=reuse): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') + with tf.variable_scope('Branch_1'): + tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1_0, 160, [1, 7], + scope='Conv2d_0b_1x7') + tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [7, 1], + scope='Conv2d_0c_7x1') + mixed = tf.concat([tower_conv, tower_conv1_2], 3) + up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, + activation_fn=None, scope='Conv2d_1x1') + net += scale * up + if activation_fn: + net = activation_fn(net) + return net + + +# Inception-Resnet-C +def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): + """Builds the 8x8 resnet block.""" + with tf.variable_scope(scope, 'Block8', [net], reuse=reuse): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') + with tf.variable_scope('Branch_1'): + tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1_0, 224, [1, 3], + scope='Conv2d_0b_1x3') + tower_conv1_2 = slim.conv2d(tower_conv1_1, 256, [3, 1], + scope='Conv2d_0c_3x1') + mixed = tf.concat([tower_conv, tower_conv1_2], 3) + up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, + activation_fn=None, scope='Conv2d_1x1') + net += scale * up + if activation_fn: + net = activation_fn(net) + return net + +def inference(images, keep_probability, phase_train=True, + bottleneck_layer_size=128, weight_decay=0.0, reuse=None): + batch_norm_params = { + # Decay for the moving averages. + 'decay': 0.995, + # epsilon to prevent 0s in variance. + 'epsilon': 0.001, + # force in-place updates of mean and variance estimates + 'updates_collections': None, + # Moving averages ends up in the trainable variables collection + 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ], +} + with slim.arg_scope([slim.conv2d, slim.fully_connected], + weights_initializer=slim.initializers.xavier_initializer(), + weights_regularizer=slim.l2_regularizer(weight_decay), + normalizer_fn=slim.batch_norm, + normalizer_params=batch_norm_params): + return inception_resnet_v2(images, is_training=phase_train, + dropout_keep_prob=keep_probability, bottleneck_layer_size=bottleneck_layer_size, reuse=reuse) + + +def inception_resnet_v2(inputs, is_training=True, + dropout_keep_prob=0.8, + bottleneck_layer_size=128, + reuse=None, + scope='InceptionResnetV2'): + """Creates the Inception Resnet V2 model. + Args: + inputs: a 4-D tensor of size [batch_size, height, width, 3]. + num_classes: number of predicted classes. + is_training: whether is training or not. + dropout_keep_prob: float, the fraction to keep before final layer. + reuse: whether or not the network and its variables should be reused. To be + able to reuse 'scope' must be given. + scope: Optional variable_scope. + Returns: + logits: the logits outputs of the model. + end_points: the set of end_points from the inception model. + """ + end_points = {} + + with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse): + with slim.arg_scope([slim.batch_norm, slim.dropout], + is_training=is_training): + with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], + stride=1, padding='SAME'): + + # 149 x 149 x 32 + net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', + scope='Conv2d_1a_3x3') + end_points['Conv2d_1a_3x3'] = net + # 147 x 147 x 32 + net = slim.conv2d(net, 32, 3, padding='VALID', + scope='Conv2d_2a_3x3') + end_points['Conv2d_2a_3x3'] = net + # 147 x 147 x 64 + net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') + end_points['Conv2d_2b_3x3'] = net + # 73 x 73 x 64 + net = slim.max_pool2d(net, 3, stride=2, padding='VALID', + scope='MaxPool_3a_3x3') + end_points['MaxPool_3a_3x3'] = net + # 73 x 73 x 80 + net = slim.conv2d(net, 80, 1, padding='VALID', + scope='Conv2d_3b_1x1') + end_points['Conv2d_3b_1x1'] = net + # 71 x 71 x 192 + net = slim.conv2d(net, 192, 3, padding='VALID', + scope='Conv2d_4a_3x3') + end_points['Conv2d_4a_3x3'] = net + # 35 x 35 x 192 + net = slim.max_pool2d(net, 3, stride=2, padding='VALID', + scope='MaxPool_5a_3x3') + end_points['MaxPool_5a_3x3'] = net + + # 35 x 35 x 320 + with tf.variable_scope('Mixed_5b'): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') + with tf.variable_scope('Branch_1'): + tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, + scope='Conv2d_0b_5x5') + with tf.variable_scope('Branch_2'): + tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') + tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, + scope='Conv2d_0b_3x3') + tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, + scope='Conv2d_0c_3x3') + with tf.variable_scope('Branch_3'): + tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', + scope='AvgPool_0a_3x3') + tower_pool_1 = slim.conv2d(tower_pool, 64, 1, + scope='Conv2d_0b_1x1') + net = tf.concat([tower_conv, tower_conv1_1, + tower_conv2_2, tower_pool_1], 3) + + end_points['Mixed_5b'] = net + net = slim.repeat(net, 10, block35, scale=0.17) + + # 17 x 17 x 1024 + with tf.variable_scope('Mixed_6a'): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 384, 3, stride=2, padding='VALID', + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, + scope='Conv2d_0b_3x3') + tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, + stride=2, padding='VALID', + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_2'): + tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', + scope='MaxPool_1a_3x3') + net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) + + end_points['Mixed_6a'] = net + net = slim.repeat(net, 20, block17, scale=0.10) + + with tf.variable_scope('Mixed_7a'): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') + tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_2'): + tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') + tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, + scope='Conv2d_0b_3x3') + tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_3'): + tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', + scope='MaxPool_1a_3x3') + net = tf.concat([tower_conv_1, tower_conv1_1, + tower_conv2_2, tower_pool], 3) + + end_points['Mixed_7a'] = net + + net = slim.repeat(net, 9, block8, scale=0.20) + net = block8(net, activation_fn=None) + + net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') + end_points['Conv2d_7b_1x1'] = net + + with tf.variable_scope('Logits'): + end_points['PrePool'] = net + #pylint: disable=no-member + net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', + scope='AvgPool_1a_8x8') + net = slim.flatten(net) + + net = slim.dropout(net, dropout_keep_prob, is_training=is_training, + scope='Dropout') + + end_points['PreLogitsFlatten'] = net + + net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, + scope='Bottleneck', reuse=False) + + return net, end_points diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/models/squeezenet.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/models/squeezenet.py new file mode 100644 index 0000000000000000000000000000000000000000..2b395b23e81015916a12a1161e30b5c43efd6d72 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/models/squeezenet.py @@ -0,0 +1,94 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +import tensorflow.contrib.slim as slim + +def fire_module(inputs, + squeeze_depth, + expand_depth, + reuse=None, + scope=None, + outputs_collections=None): + with tf.variable_scope(scope, 'fire', [inputs], reuse=reuse): + with slim.arg_scope([slim.conv2d, slim.max_pool2d], + outputs_collections=None): + net = squeeze(inputs, squeeze_depth) + outputs = expand(net, expand_depth) + return outputs + +def squeeze(inputs, num_outputs): + return slim.conv2d(inputs, num_outputs, [1, 1], stride=1, scope='squeeze') + +def expand(inputs, num_outputs): + with tf.variable_scope('expand'): + e1x1 = slim.conv2d(inputs, num_outputs, [1, 1], stride=1, scope='1x1') + e3x3 = slim.conv2d(inputs, num_outputs, [3, 3], scope='3x3') + return tf.concat([e1x1, e3x3], 3) + +def inference(images, keep_probability, phase_train=True, bottleneck_layer_size=128, weight_decay=0.0, reuse=None): + batch_norm_params = { + # Decay for the moving averages. + 'decay': 0.995, + # epsilon to prevent 0s in variance. + 'epsilon': 0.001, + # force in-place updates of mean and variance estimates + 'updates_collections': None, + # Moving averages ends up in the trainable variables collection + 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ], + } + with slim.arg_scope([slim.conv2d, slim.fully_connected], + weights_initializer=slim.xavier_initializer_conv2d(uniform=True), + weights_regularizer=slim.l2_regularizer(weight_decay), + normalizer_fn=slim.batch_norm, + normalizer_params=batch_norm_params): + with tf.variable_scope('squeezenet', [images], reuse=reuse): + with slim.arg_scope([slim.batch_norm, slim.dropout], + is_training=phase_train): + net = slim.conv2d(images, 96, [7, 7], stride=2, scope='conv1') + net = slim.max_pool2d(net, [3, 3], stride=2, scope='maxpool1') + net = fire_module(net, 16, 64, scope='fire2') + net = fire_module(net, 16, 64, scope='fire3') + net = fire_module(net, 32, 128, scope='fire4') + net = slim.max_pool2d(net, [2, 2], stride=2, scope='maxpool4') + net = fire_module(net, 32, 128, scope='fire5') + net = fire_module(net, 48, 192, scope='fire6') + net = fire_module(net, 48, 192, scope='fire7') + net = fire_module(net, 64, 256, scope='fire8') + net = slim.max_pool2d(net, [3, 3], stride=2, scope='maxpool8') + net = fire_module(net, 64, 256, scope='fire9') + net = slim.dropout(net, keep_probability) + net = slim.conv2d(net, 1000, [1, 1], activation_fn=None, normalizer_fn=None, scope='conv10') + net = slim.avg_pool2d(net, net.get_shape()[1:3], scope='avgpool10') + net = tf.squeeze(net, [1, 2], name='logits') + net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, + scope='Bottleneck', reuse=False) + return net, None diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/test.txt b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/test.txt new file mode 100644 index 0000000000000000000000000000000000000000..067fdb43888d60f54e5966a7ba4db107fb045772 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/test.txt @@ -0,0 +1,2 @@ +1,2 +3,4 diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/train_softmax.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/train_softmax.py new file mode 100644 index 0000000000000000000000000000000000000000..c66754d8bc2426ec4c79ea31148560939aa4f7a2 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/train_softmax.py @@ -0,0 +1,607 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Training a face recognizer with TensorFlow using softmax cross entropy loss +""" +# MIT License +# +# Copyright (c) 2016 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from datetime import datetime +import os.path +import time +import sys +import random +import tensorflow as tf +import numpy as np +import importlib +import argparse +import facenet +import lfw +import h5py +import math +import tensorflow.contrib.slim as slim +from tensorflow.python.ops import data_flow_ops +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops + +def main(args): + + network = importlib.import_module(args.model_def) + image_size = (args.image_size, args.image_size) + + subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') + log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) + if not os.path.isdir(log_dir): # Create the log directory if it doesn't exist + os.makedirs(log_dir) + model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) + if not os.path.isdir(model_dir): # Create the model directory if it doesn't exist + os.makedirs(model_dir) + + stat_file_name = os.path.join(log_dir, 'stat.h5') + + # Write arguments to a text file + facenet.write_arguments_to_file(args, os.path.join(log_dir, 'arguments.txt')) + + # Store some git revision info in a text file in the log directory + src_path,_ = os.path.split(os.path.realpath(__file__)) + facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) + + np.random.seed(seed=args.seed) + random.seed(args.seed) + dataset = facenet.get_dataset(args.data_dir) + if args.filter_filename: + dataset = filter_dataset(dataset, os.path.expanduser(args.filter_filename), + args.filter_percentile, args.filter_min_nrof_images_per_class) + + if args.validation_set_split_ratio>0.0: + train_set, val_set = facenet.split_dataset(dataset, args.validation_set_split_ratio, args.min_nrof_val_images_per_class, 'SPLIT_IMAGES') + else: + train_set, val_set = dataset, [] + + nrof_classes = len(train_set) + + print('Model directory: %s' % model_dir) + print('Log directory: %s' % log_dir) + pretrained_model = None + if args.pretrained_model: + pretrained_model = os.path.expanduser(args.pretrained_model) + print('Pre-trained model: %s' % pretrained_model) + + if args.lfw_dir: + print('LFW directory: %s' % args.lfw_dir) + # Read the file containing the pairs used for testing + pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) + # Get the paths for the corresponding images + lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs) + + with tf.Graph().as_default(): + tf.set_random_seed(args.seed) + global_step = tf.Variable(0, trainable=False) + + # Get a list of image paths and their labels + image_list, label_list = facenet.get_image_paths_and_labels(train_set) + assert len(image_list)>0, 'The training set should not be empty' + + val_image_list, val_label_list = facenet.get_image_paths_and_labels(val_set) + + # Create a queue that produces indices into the image_list and label_list + labels = ops.convert_to_tensor(label_list, dtype=tf.int32) + range_size = array_ops.shape(labels)[0] + index_queue = tf.train.range_input_producer(range_size, num_epochs=None, + shuffle=True, seed=None, capacity=32) + + index_dequeue_op = index_queue.dequeue_many(args.batch_size*args.epoch_size, 'index_dequeue') + + learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') + batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') + phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') + image_paths_placeholder = tf.placeholder(tf.string, shape=(None,1), name='image_paths') + labels_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='labels') + control_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='control') + + nrof_preprocess_threads = 4 + input_queue = data_flow_ops.FIFOQueue(capacity=2000000, + dtypes=[tf.string, tf.int32, tf.int32], + shapes=[(1,), (1,), (1,)], + shared_name=None, name=None) + enqueue_op = input_queue.enqueue_many([image_paths_placeholder, labels_placeholder, control_placeholder], name='enqueue_op') + image_batch, label_batch = facenet.create_input_pipeline(input_queue, image_size, nrof_preprocess_threads, batch_size_placeholder) + + image_batch = tf.identity(image_batch, 'image_batch') + image_batch = tf.identity(image_batch, 'input') + label_batch = tf.identity(label_batch, 'label_batch') + + print('Number of classes in training set: %d' % nrof_classes) + print('Number of examples in training set: %d' % len(image_list)) + + print('Number of classes in validation set: %d' % len(val_set)) + print('Number of examples in validation set: %d' % len(val_image_list)) + + print('Building training graph') + + # Build the inference graph + prelogits, _ = network.inference(image_batch, args.keep_probability, + phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, + weight_decay=args.weight_decay) + logits = slim.fully_connected(prelogits, len(train_set), activation_fn=None, + weights_initializer=slim.initializers.xavier_initializer(), + weights_regularizer=slim.l2_regularizer(args.weight_decay), + scope='Logits', reuse=False) + + embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') + + # Norm for the prelogits + eps = 1e-4 + prelogits_norm = tf.reduce_mean(tf.norm(tf.abs(prelogits)+eps, ord=args.prelogits_norm_p, axis=1)) + tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_norm * args.prelogits_norm_loss_factor) + + # Add center loss + prelogits_center_loss, _ = facenet.center_loss(prelogits, label_batch, args.center_loss_alfa, nrof_classes) + tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_center_loss * args.center_loss_factor) + + learning_rate = tf.train.exponential_decay(learning_rate_placeholder, global_step, + args.learning_rate_decay_epochs*args.epoch_size, args.learning_rate_decay_factor, staircase=True) + tf.summary.scalar('learning_rate', learning_rate) + + # Calculate the average cross entropy loss across the batch + cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=label_batch, logits=logits, name='cross_entropy_per_example') + cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') + tf.add_to_collection('losses', cross_entropy_mean) + + correct_prediction = tf.cast(tf.equal(tf.argmax(logits, 1), tf.cast(label_batch, tf.int64)), tf.float32) + accuracy = tf.reduce_mean(correct_prediction) + + # Calculate the total losses + regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) + total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss') + + # Build a Graph that trains the model with one batch of examples and updates the model parameters + train_op = facenet.train(total_loss, global_step, args.optimizer, + learning_rate, args.moving_average_decay, tf.global_variables(), args.log_histograms) + + # Create a saver + saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) + + # Build the summary operation based on the TF collection of Summaries. + summary_op = tf.summary.merge_all() + + # Start running operations on the Graph. + gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) + sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) + sess.run(tf.global_variables_initializer()) + sess.run(tf.local_variables_initializer()) + summary_writer = tf.summary.FileWriter(log_dir, sess.graph) + coord = tf.train.Coordinator() + tf.train.start_queue_runners(coord=coord, sess=sess) + + with sess.as_default(): + + if pretrained_model: + print('Restoring pretrained model: %s' % pretrained_model) + saver.restore(sess, pretrained_model) + + # Training and validation loop + print('Running training') + nrof_steps = args.max_nrof_epochs*args.epoch_size + nrof_val_samples = int(math.ceil(args.max_nrof_epochs / args.validate_every_n_epochs)) # Validate every validate_every_n_epochs as well as in the last epoch + stat = { + 'loss': np.zeros((nrof_steps,), np.float32), + 'center_loss': np.zeros((nrof_steps,), np.float32), + 'reg_loss': np.zeros((nrof_steps,), np.float32), + 'xent_loss': np.zeros((nrof_steps,), np.float32), + 'prelogits_norm': np.zeros((nrof_steps,), np.float32), + 'accuracy': np.zeros((nrof_steps,), np.float32), + 'val_loss': np.zeros((nrof_val_samples,), np.float32), + 'val_xent_loss': np.zeros((nrof_val_samples,), np.float32), + 'val_accuracy': np.zeros((nrof_val_samples,), np.float32), + 'lfw_accuracy': np.zeros((args.max_nrof_epochs,), np.float32), + 'lfw_valrate': np.zeros((args.max_nrof_epochs,), np.float32), + 'learning_rate': np.zeros((args.max_nrof_epochs,), np.float32), + 'time_train': np.zeros((args.max_nrof_epochs,), np.float32), + 'time_validate': np.zeros((args.max_nrof_epochs,), np.float32), + 'time_evaluate': np.zeros((args.max_nrof_epochs,), np.float32), + 'prelogits_hist': np.zeros((args.max_nrof_epochs, 1000), np.float32), + } + for epoch in range(1,args.max_nrof_epochs+1): + step = sess.run(global_step, feed_dict=None) + # Train for one epoch + t = time.time() + cont = train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, + learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, global_step, + total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file, + stat, cross_entropy_mean, accuracy, learning_rate, + prelogits, prelogits_center_loss, args.random_rotate, args.random_crop, args.random_flip, prelogits_norm, args.prelogits_hist_max, args.use_fixed_image_standardization) + stat['time_train'][epoch-1] = time.time() - t + + if not cont: + break + + t = time.time() + if len(val_image_list)>0 and ((epoch-1) % args.validate_every_n_epochs == args.validate_every_n_epochs-1 or epoch==args.max_nrof_epochs): + validate(args, sess, epoch, val_image_list, val_label_list, enqueue_op, image_paths_placeholder, labels_placeholder, control_placeholder, + phase_train_placeholder, batch_size_placeholder, + stat, total_loss, regularization_losses, cross_entropy_mean, accuracy, args.validate_every_n_epochs, args.use_fixed_image_standardization) + stat['time_validate'][epoch-1] = time.time() - t + + # Save variables and the metagraph if it doesn't exist already + save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, epoch) + + # Evaluate on LFW + t = time.time() + if args.lfw_dir: + evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, + embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, stat, epoch, + args.lfw_distance_metric, args.lfw_subtract_mean, args.lfw_use_flipped_images, args.use_fixed_image_standardization) + stat['time_evaluate'][epoch-1] = time.time() - t + + print('Saving statistics') + with h5py.File(stat_file_name, 'w') as f: + for key, value in stat.iteritems(): + f.create_dataset(key, data=value) + + return model_dir + +def find_threshold(var, percentile): + hist, bin_edges = np.histogram(var, 100) + cdf = np.float32(np.cumsum(hist)) / np.sum(hist) + bin_centers = (bin_edges[:-1]+bin_edges[1:])/2 + #plt.plot(bin_centers, cdf) + threshold = np.interp(percentile*0.01, cdf, bin_centers) + return threshold + +def filter_dataset(dataset, data_filename, percentile, min_nrof_images_per_class): + with h5py.File(data_filename,'r') as f: + distance_to_center = np.array(f.get('distance_to_center')) + label_list = np.array(f.get('label_list')) + image_list = np.array(f.get('image_list')) + distance_to_center_threshold = find_threshold(distance_to_center, percentile) + indices = np.where(distance_to_center>=distance_to_center_threshold)[0] + filtered_dataset = dataset + removelist = [] + for i in indices: + label = label_list[i] + image = image_list[i] + if image in filtered_dataset[label].image_paths: + filtered_dataset[label].image_paths.remove(image) + if len(filtered_dataset[label].image_paths)0.0: + lr = args.learning_rate + else: + lr = facenet.get_learning_rate_from_file(learning_rate_schedule_file, epoch) + + if lr<=0: + return False + + index_epoch = sess.run(index_dequeue_op) + label_epoch = np.array(label_list)[index_epoch] + image_epoch = np.array(image_list)[index_epoch] + + # Enqueue one epoch of image paths and labels + labels_array = np.expand_dims(np.array(label_epoch),1) + image_paths_array = np.expand_dims(np.array(image_epoch),1) + control_value = facenet.RANDOM_ROTATE * random_rotate + facenet.RANDOM_CROP * random_crop + facenet.RANDOM_FLIP * random_flip + facenet.FIXED_STANDARDIZATION * use_fixed_image_standardization + control_array = np.ones_like(labels_array) * control_value + sess.run(enqueue_op, {image_paths_placeholder: image_paths_array, labels_placeholder: labels_array, control_placeholder: control_array}) + + # Training loop + train_time = 0 + while batch_number < args.epoch_size: + start_time = time.time() + feed_dict = {learning_rate_placeholder: lr, phase_train_placeholder:True, batch_size_placeholder:args.batch_size} + tensor_list = [loss, train_op, step, reg_losses, prelogits, cross_entropy_mean, learning_rate, prelogits_norm, accuracy, prelogits_center_loss] + if batch_number % 100 == 0: + loss_, _, step_, reg_losses_, prelogits_, cross_entropy_mean_, lr_, prelogits_norm_, accuracy_, center_loss_, summary_str = sess.run(tensor_list + [summary_op], feed_dict=feed_dict) + summary_writer.add_summary(summary_str, global_step=step_) + else: + loss_, _, step_, reg_losses_, prelogits_, cross_entropy_mean_, lr_, prelogits_norm_, accuracy_, center_loss_ = sess.run(tensor_list, feed_dict=feed_dict) + + duration = time.time() - start_time + stat['loss'][step_-1] = loss_ + stat['center_loss'][step_-1] = center_loss_ + stat['reg_loss'][step_-1] = np.sum(reg_losses_) + stat['xent_loss'][step_-1] = cross_entropy_mean_ + stat['prelogits_norm'][step_-1] = prelogits_norm_ + stat['learning_rate'][epoch-1] = lr_ + stat['accuracy'][step_-1] = accuracy_ + stat['prelogits_hist'][epoch-1,:] += np.histogram(np.minimum(np.abs(prelogits_), prelogits_hist_max), bins=1000, range=(0.0, prelogits_hist_max))[0] + + duration = time.time() - start_time + print('Epoch: [%d][%d/%d]\tTime %.3f\tLoss %2.3f\tXent %2.3f\tRegLoss %2.3f\tAccuracy %2.3f\tLr %2.5f\tCl %2.3f' % + (epoch, batch_number+1, args.epoch_size, duration, loss_, cross_entropy_mean_, np.sum(reg_losses_), accuracy_, lr_, center_loss_)) + batch_number += 1 + train_time += duration + # Add validation loss and accuracy to summary + summary = tf.Summary() + #pylint: disable=maybe-no-member + summary.value.add(tag='time/total', simple_value=train_time) + summary_writer.add_summary(summary, global_step=step_) + return True + +def validate(args, sess, epoch, image_list, label_list, enqueue_op, image_paths_placeholder, labels_placeholder, control_placeholder, + phase_train_placeholder, batch_size_placeholder, + stat, loss, regularization_losses, cross_entropy_mean, accuracy, validate_every_n_epochs, use_fixed_image_standardization): + + print('Running forward pass on validation set') + + nrof_batches = len(label_list) // args.lfw_batch_size + nrof_images = nrof_batches * args.lfw_batch_size + + # Enqueue one epoch of image paths and labels + labels_array = np.expand_dims(np.array(label_list[:nrof_images]),1) + image_paths_array = np.expand_dims(np.array(image_list[:nrof_images]),1) + control_array = np.ones_like(labels_array, np.int32)*facenet.FIXED_STANDARDIZATION * use_fixed_image_standardization + sess.run(enqueue_op, {image_paths_placeholder: image_paths_array, labels_placeholder: labels_array, control_placeholder: control_array}) + + loss_array = np.zeros((nrof_batches,), np.float32) + xent_array = np.zeros((nrof_batches,), np.float32) + accuracy_array = np.zeros((nrof_batches,), np.float32) + + # Training loop + start_time = time.time() + for i in range(nrof_batches): + feed_dict = {phase_train_placeholder:False, batch_size_placeholder:args.lfw_batch_size} + loss_, cross_entropy_mean_, accuracy_ = sess.run([loss, cross_entropy_mean, accuracy], feed_dict=feed_dict) + loss_array[i], xent_array[i], accuracy_array[i] = (loss_, cross_entropy_mean_, accuracy_) + if i % 10 == 9: + print('.', end='') + sys.stdout.flush() + print('') + + duration = time.time() - start_time + + val_index = (epoch-1)//validate_every_n_epochs + stat['val_loss'][val_index] = np.mean(loss_array) + stat['val_xent_loss'][val_index] = np.mean(xent_array) + stat['val_accuracy'][val_index] = np.mean(accuracy_array) + + print('Validation Epoch: %d\tTime %.3f\tLoss %2.3f\tXent %2.3f\tAccuracy %2.3f' % + (epoch, duration, np.mean(loss_array), np.mean(xent_array), np.mean(accuracy_array))) + + +def evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, + embeddings, labels, image_paths, actual_issame, batch_size, nrof_folds, log_dir, step, summary_writer, stat, epoch, distance_metric, subtract_mean, use_flipped_images, use_fixed_image_standardization): + start_time = time.time() + # Run forward pass to calculate embeddings + print('Runnning forward pass on LFW images') + + # Enqueue one epoch of image paths and labels + nrof_embeddings = len(actual_issame)*2 # nrof_pairs * nrof_images_per_pair + nrof_flips = 2 if use_flipped_images else 1 + nrof_images = nrof_embeddings * nrof_flips + labels_array = np.expand_dims(np.arange(0,nrof_images),1) + image_paths_array = np.expand_dims(np.repeat(np.array(image_paths),nrof_flips),1) + control_array = np.zeros_like(labels_array, np.int32) + if use_fixed_image_standardization: + control_array += np.ones_like(labels_array)*facenet.FIXED_STANDARDIZATION + if use_flipped_images: + # Flip every second image + control_array += (labels_array % 2)*facenet.FLIP + sess.run(enqueue_op, {image_paths_placeholder: image_paths_array, labels_placeholder: labels_array, control_placeholder: control_array}) + + embedding_size = int(embeddings.get_shape()[1]) + assert nrof_images % batch_size == 0, 'The number of LFW images must be an integer multiple of the LFW batch size' + nrof_batches = nrof_images // batch_size + emb_array = np.zeros((nrof_images, embedding_size)) + lab_array = np.zeros((nrof_images,)) + for i in range(nrof_batches): + feed_dict = {phase_train_placeholder:False, batch_size_placeholder:batch_size} + emb, lab = sess.run([embeddings, labels], feed_dict=feed_dict) + lab_array[lab] = lab + emb_array[lab, :] = emb + if i % 10 == 9: + print('.', end='') + sys.stdout.flush() + print('') + embeddings = np.zeros((nrof_embeddings, embedding_size*nrof_flips)) + if use_flipped_images: + # Concatenate embeddings for flipped and non flipped version of the images + embeddings[:,:embedding_size] = emb_array[0::2,:] + embeddings[:,embedding_size:] = emb_array[1::2,:] + else: + embeddings = emb_array + + assert np.array_equal(lab_array, np.arange(nrof_images))==True, 'Wrong labels used for evaluation, possibly caused by training examples left in the input pipeline' + _, _, accuracy, val, val_std, far = lfw.evaluate(embeddings, actual_issame, nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean) + + print('Accuracy: %2.5f+-%2.5f' % (np.mean(accuracy), np.std(accuracy))) + print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far)) + lfw_time = time.time() - start_time + # Add validation loss and accuracy to summary + summary = tf.Summary() + #pylint: disable=maybe-no-member + summary.value.add(tag='lfw/accuracy', simple_value=np.mean(accuracy)) + summary.value.add(tag='lfw/val_rate', simple_value=val) + summary.value.add(tag='time/lfw', simple_value=lfw_time) + summary_writer.add_summary(summary, step) + with open(os.path.join(log_dir,'lfw_result.txt'),'at') as f: + f.write('%d\t%.5f\t%.5f\n' % (step, np.mean(accuracy), val)) + stat['lfw_accuracy'][epoch-1] = np.mean(accuracy) + stat['lfw_valrate'][epoch-1] = val + +def save_variables_and_metagraph(sess, saver, summary_writer, model_dir, model_name, step): + # Save the model checkpoint + print('Saving variables') + start_time = time.time() + checkpoint_path = os.path.join(model_dir, 'model-%s.ckpt' % model_name) + saver.save(sess, checkpoint_path, global_step=step, write_meta_graph=False) + save_time_variables = time.time() - start_time + print('Variables saved in %.2f seconds' % save_time_variables) + metagraph_filename = os.path.join(model_dir, 'model-%s.meta' % model_name) + save_time_metagraph = 0 + if not os.path.exists(metagraph_filename): + print('Saving metagraph') + start_time = time.time() + saver.export_meta_graph(metagraph_filename) + save_time_metagraph = time.time() - start_time + print('Metagraph saved in %.2f seconds' % save_time_metagraph) + summary = tf.Summary() + #pylint: disable=maybe-no-member + summary.value.add(tag='time/save_variables', simple_value=save_time_variables) + summary.value.add(tag='time/save_metagraph', simple_value=save_time_metagraph) + summary_writer.add_summary(summary, step) + + +def parse_arguments(argv): + parser = argparse.ArgumentParser() + + parser.add_argument('--logs_base_dir', type=str, + help='Directory where to write event logs.', default='~/logs/facenet') + parser.add_argument('--models_base_dir', type=str, + help='Directory where to write trained models and checkpoints.', default='~/models/facenet') + parser.add_argument('--gpu_memory_fraction', type=float, + help='Upper bound on the amount of GPU memory that will be used by the process.', default=1.0) + parser.add_argument('--pretrained_model', type=str, + help='Load a pretrained model before training starts.') + parser.add_argument('--data_dir', type=str, + help='Path to the data directory containing aligned face patches.', + default='~/datasets/casia/casia_maxpy_mtcnnalign_182_160') + parser.add_argument('--model_def', type=str, + help='Model definition. Points to a module containing the definition of the inference graph.', default='models.inception_resnet_v1') + parser.add_argument('--max_nrof_epochs', type=int, + help='Number of epochs to run.', default=500) + parser.add_argument('--batch_size', type=int, + help='Number of images to process in a batch.', default=90) + parser.add_argument('--image_size', type=int, + help='Image size (height, width) in pixels.', default=160) + parser.add_argument('--epoch_size', type=int, + help='Number of batches per epoch.', default=1000) + parser.add_argument('--embedding_size', type=int, + help='Dimensionality of the embedding.', default=128) + parser.add_argument('--random_crop', + help='Performs random cropping of training images. If false, the center image_size pixels from the training images are used. ' + + 'If the size of the images in the data directory is equal to image_size no cropping is performed', action='store_true') + parser.add_argument('--random_flip', + help='Performs random horizontal flipping of training images.', action='store_true') + parser.add_argument('--random_rotate', + help='Performs random rotations of training images.', action='store_true') + parser.add_argument('--use_fixed_image_standardization', + help='Performs fixed standardization of images.', action='store_true') + parser.add_argument('--keep_probability', type=float, + help='Keep probability of dropout for the fully connected layer(s).', default=1.0) + parser.add_argument('--weight_decay', type=float, + help='L2 weight regularization.', default=0.0) + parser.add_argument('--center_loss_factor', type=float, + help='Center loss factor.', default=0.0) + parser.add_argument('--center_loss_alfa', type=float, + help='Center update rate for center loss.', default=0.95) + parser.add_argument('--prelogits_norm_loss_factor', type=float, + help='Loss based on the norm of the activations in the prelogits layer.', default=0.0) + parser.add_argument('--prelogits_norm_p', type=float, + help='Norm to use for prelogits norm loss.', default=1.0) + parser.add_argument('--prelogits_hist_max', type=float, + help='The max value for the prelogits histogram.', default=10.0) + parser.add_argument('--optimizer', type=str, choices=['ADAGRAD', 'ADADELTA', 'ADAM', 'RMSPROP', 'MOM'], + help='The optimization algorithm to use', default='ADAGRAD') + parser.add_argument('--learning_rate', type=float, + help='Initial learning rate. If set to a negative value a learning rate ' + + 'schedule can be specified in the file "learning_rate_schedule.txt"', default=0.1) + parser.add_argument('--learning_rate_decay_epochs', type=int, + help='Number of epochs between learning rate decay.', default=100) + parser.add_argument('--learning_rate_decay_factor', type=float, + help='Learning rate decay factor.', default=1.0) + parser.add_argument('--moving_average_decay', type=float, + help='Exponential decay for tracking of training parameters.', default=0.9999) + parser.add_argument('--seed', type=int, + help='Random seed.', default=666) + parser.add_argument('--nrof_preprocess_threads', type=int, + help='Number of preprocessing (data loading and augmentation) threads.', default=4) + parser.add_argument('--log_histograms', + help='Enables logging of weight/bias histograms in tensorboard.', action='store_true') + parser.add_argument('--learning_rate_schedule_file', type=str, + help='File containing the learning rate schedule that is used when learning_rate is set to to -1.', default='data/learning_rate_schedule.txt') + parser.add_argument('--filter_filename', type=str, + help='File containing image data used for dataset filtering', default='') + parser.add_argument('--filter_percentile', type=float, + help='Keep only the percentile images closed to its class center', default=100.0) + parser.add_argument('--filter_min_nrof_images_per_class', type=int, + help='Keep only the classes with this number of examples or more', default=0) + parser.add_argument('--validate_every_n_epochs', type=int, + help='Number of epoch between validation', default=5) + parser.add_argument('--validation_set_split_ratio', type=float, + help='The ratio of the total dataset to use for validation', default=0.0) + parser.add_argument('--min_nrof_val_images_per_class', type=float, + help='Classes with fewer images will be removed from the validation set', default=0) + + # Parameters for validation on LFW + parser.add_argument('--lfw_pairs', type=str, + help='The file containing the pairs to use for validation.', default='data/pairs.txt') + parser.add_argument('--lfw_dir', type=str, + help='Path to the data directory containing aligned face patches.', default='') + parser.add_argument('--lfw_batch_size', type=int, + help='Number of images to process in a batch in the LFW test set.', default=100) + parser.add_argument('--lfw_nrof_folds', type=int, + help='Number of folds to use for cross validation. Mainly used for testing.', default=10) + parser.add_argument('--lfw_distance_metric', type=int, + help='Type of distance metric to use. 0: Euclidian, 1:Cosine similarity distance.', default=0) + parser.add_argument('--lfw_use_flipped_images', + help='Concatenates embeddings for the image and its horizontally flipped counterpart.', action='store_true') + parser.add_argument('--lfw_subtract_mean', + help='Subtract feature mean before calculating distance.', action='store_true') + return parser.parse_args(argv) + + +if __name__ == '__main__': + main(parse_arguments(sys.argv[1:])) diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/train_tripletloss.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/train_tripletloss.py new file mode 100644 index 0000000000000000000000000000000000000000..5e986604b10b7ead2558410d71507733c34b2d0e --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/src/train_tripletloss.py @@ -0,0 +1,667 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Training a face recognizer with TensorFlow based on the FaceNet paper +FaceNet: A Unified Embedding for Face Recognition and Clustering: http://arxiv.org/abs/1503.03832 +""" +# MIT License +# +# Copyright (c) 2016 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from npu_bridge.npu_init import * + +from datetime import datetime +import os.path +import time +import sys +import tensorflow as tf +import numpy as np +import importlib +import itertools +import argparse +import facenet +import lfw + +from tensorflow.python.ops import data_flow_ops + +from six.moves import xrange # @UnresolvedImport + +def main(args): + + network = importlib.import_module(args.model_def) + + ''' + subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') + log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) + if not os.path.isdir(log_dir): # Create the log directory if it doesn't exist + os.makedirs(log_dir) + model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) + ''' + ################################################################################ + log_dir = os.path.expanduser(args.logs_base_dir) + if not os.path.isdir(log_dir): # Create the log directory if it doesn't exist + os.makedirs(log_dir) + model_dir = os.path.expanduser(args.models_base_dir) + ################################################################################ + if not os.path.isdir(model_dir): # Create the model directory if it doesn't exist + os.makedirs(model_dir) + + # Write arguments to a text file + facenet.write_arguments_to_file(args, os.path.join(log_dir, 'arguments.txt')) + + # Store some git revision info in a text file in the log directory + src_path,_ = os.path.split(os.path.realpath(__file__)) + facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) + + np.random.seed(seed=args.seed) + train_set = facenet.get_dataset(args.data_dir) + + print('Model directory: %s' % model_dir) + print('Log directory: %s' % log_dir) + if args.pretrained_model: + print('Pre-trained model: %s' % os.path.expanduser(args.pretrained_model)) + + if args.lfw_dir: + print('LFW directory: %s' % args.lfw_dir) + # Read the file containing the pairs used for testing + pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) + # Get the paths for the corresponding images + lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs) + + + with tf.Graph().as_default(): + tf.set_random_seed(args.seed) + global_step = tf.Variable(0, trainable=False) + + # Placeholder for the learning rate + learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') + + batch_size_placeholder = tf.placeholder(tf.int64, name='batch_size') + + phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') + + image_paths_placeholder = tf.placeholder(tf.string, shape=(None,3), name='image_paths') + labels_placeholder = tf.placeholder(tf.int64, shape=(None,3), name='labels') + + ''' + # In order to suit the npu enviroments + input_queue = data_flow_ops.FIFOQueue(capacity=100000, + dtypes=[tf.string, tf.int64], + shapes=[(3,), (3,)], + shared_name=None, name=None) + enqueue_op = input_queue.enqueue_many([image_paths_placeholder, labels_placeholder]) + + nrof_preprocess_threads = 4 + images_and_labels = [] + for _ in range(nrof_preprocess_threads): + filenames, label = input_queue.dequeue() + images = [] + for filename in tf.unstack(filenames): + file_contents = tf.read_file(filename) + image = tf.image.decode_image(file_contents, channels=3) + + if args.random_crop: + image = tf.random_crop(image, [args.image_size, args.image_size, 3]) + else: + image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size) + if args.random_flip: + image = tf.image.random_flip_left_right(image) + + #pylint: disable=no-member + image.set_shape((args.image_size, args.image_size, 3)) + images.append(tf.image.per_image_standardization(image)) + images_and_labels.append([images, label]) + + image_batch, labels_batch = tf.train.batch_join( + images_and_labels, batch_size=batch_size_placeholder, + shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, + capacity=4 * nrof_preprocess_threads * args.batch_size, + allow_smaller_final_batch=True) + ''' + ########################################################################################### + dataset = tf.data.Dataset.from_tensor_slices((image_paths_placeholder, labels_placeholder)) + dataset = dataset.map(lambda filenames, label: parser_data(filenames, label, args.random_crop, args.random_flip, + args.image_size)) + dataset = dataset.unbatch() + dataset = dataset.batch(batch_size=batch_size_placeholder, drop_remainder=True) + iterator = tf.data.make_initializable_iterator(dataset) + image_batch, labels_batch = iterator.get_next() + ########################################################################################### + + image_batch = tf.identity(image_batch, 'image_batch') + image_batch = tf.identity(image_batch, 'input') + labels_batch = tf.identity(labels_batch, 'label_batch') + + # Build the inference graph + prelogits, _ = network.inference(image_batch, args.keep_probability, + phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, + weight_decay=args.weight_decay) + + embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') + # Split embeddings into anchor, positive and negative and calculate triplet loss + anchor, positive, negative = tf.unstack(tf.reshape(embeddings, [-1,3,args.embedding_size]), 3, 1) + triplet_loss = facenet.triplet_loss(anchor, positive, negative, args.alpha) + + learning_rate = tf.train.exponential_decay(learning_rate_placeholder, global_step, + args.learning_rate_decay_epochs*args.epoch_size, args.learning_rate_decay_factor, staircase=True) + tf.summary.scalar('learning_rate', learning_rate) + + # Calculate the total losses + regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) + total_loss = tf.add_n([triplet_loss] + regularization_losses, name='total_loss') + + # Build a Graph that trains the model with one batch of examples and updates the model parameters + train_op = facenet.train(total_loss, global_step, args.optimizer, + learning_rate, args.moving_average_decay, tf.global_variables()) + + # Create a saver + saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) + + # Build the summary operation based on the TF collection of Summaries. + summary_op = tf.summary.merge_all() + + ''' + # Start running operations on the Graph. + gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) + sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) + ''' + ########################################################################################### + config = tf.ConfigProto() + custom_op = config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = "NpuOptimizer" + custom_op.parameter_map["mix_compile_mode"].b = True + config.graph_options.rewrite_options.remapping = RewriterConfig.OFF + config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF + sess = tf.Session(config=npu_config_proto(config_proto=config)) + ########################################################################################### + + # Initialize variables + sess.run(tf.global_variables_initializer(), feed_dict={phase_train_placeholder:True}) + sess.run(tf.local_variables_initializer(), feed_dict={phase_train_placeholder:True}) + + summary_writer = tf.summary.FileWriter(log_dir, sess.graph) + + ''' + coord = tf.train.Coordinator() + tf.train.start_queue_runners(coord=coord, sess=sess) + ''' + + with sess.as_default(): + + if args.pretrained_model: + print('Restoring pretrained model: %s' % args.pretrained_model) + ckpt_file = tf.train.latest_checkpoint(args.pretrained_model) + saver.restore(sess, ckpt_file) + + # Training and validation loop + epoch = 0 + while epoch < args.max_nrof_epochs: + step = sess.run(global_step, feed_dict=None) + epoch = step // args.epoch_size + # Train for one epoch + ''' + train(args, sess, train_set, epoch, image_paths_placeholder, labels_placeholder, labels_batch, + batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, input_queue, global_step, + embeddings, total_loss, train_op, summary_op, summary_writer, args.learning_rate_schedule_file, + args.embedding_size, anchor, positive, negative, triplet_loss) + ''' + ########################################################################################### + train(args, sess, train_set, epoch, image_paths_placeholder, labels_placeholder, labels_batch, + batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, global_step, + embeddings, total_loss, train_op, summary_op, summary_writer, args.learning_rate_schedule_file, + args.embedding_size, anchor, positive, negative, triplet_loss, iterator) + ########################################################################################### + + # Save variables and the metagraph if it doesn't exist already + save_variables_and_metagraph(sess, saver, summary_writer, model_dir, step) + + # Evaluate on LFW + if args.lfw_dir: + ''' + evaluate(sess, lfw_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder, + batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, actual_issame, args.batch_size, + args.lfw_nrof_folds, log_dir, step, summary_writer, args.embedding_size) + ''' + ########################################################################################### + evaluate(sess, lfw_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder, + batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, actual_issame, + args.batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, + args.embedding_size, iterator) + ########################################################################################### + + return model_dir + + +def parser_data(filenames, label, random_crop, random_flip, image_size): + + images = [] + for filename in tf.unstack(filenames): + file_contents = tf.read_file(filename) + image = tf.image.decode_image(file_contents, channels=3, dtype=tf.float32) + + if random_crop: + image = tf.random_crop(image, [image_size, image_size, 3]) + else: + image = tf.image.resize_with_crop_or_pad(image, image_size, image_size) + if random_flip: + image = tf.image.random_flip_left_right(image) + + # pylint: disable=no-member + image.set_shape((image_size, image_size, 3)) + images.append(tf.image.per_image_standardization(image)) + + return [images, label] + +''' +def train(args, sess, dataset, epoch, image_paths_placeholder, labels_placeholder, labels_batch, + batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, input_queue, global_step, + embeddings, loss, train_op, summary_op, summary_writer, learning_rate_schedule_file, + embedding_size, anchor, positive, negative, triplet_loss): +''' +def train(args, sess, dataset, epoch, image_paths_placeholder, labels_placeholder, labels_batch, + batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, global_step, + embeddings, loss, train_op, summary_op, summary_writer, learning_rate_schedule_file, + embedding_size, anchor, positive, negative, triplet_loss,iterator): + + batch_number = 0 + + if args.learning_rate>0.0: + lr = args.learning_rate + else: + lr = facenet.get_learning_rate_from_file(learning_rate_schedule_file, epoch) + while batch_number < args.epoch_size: + # Sample people randomly from the dataset + # image_paths, num_per_class = sample_people(dataset, args.people_per_batch, args.images_per_person) + ########################################################################################### + image_paths, num_per_class = sample_people(dataset, args.people_per_batch, + args.images_per_person, args.batch_size) + ########################################################################################### + + print('Running forward pass on sampled images: ', end='') + start_time = time.time() + + nrof_examples = args.people_per_batch * args.images_per_person + #labels_array = np.reshape(np.arange(nrof_examples), (-1, 3)) + ########################################################################################### + j = int(np.floor(nrof_examples / args.batch_size)) + nrof_examples = j * args.batch_size + labels_array = np.reshape(np.arange(nrof_examples,dtype=np.int64), (-1, 3)) + ########################################################################################### + + image_paths_array = np.reshape(np.expand_dims(np.array(image_paths),1), (-1,3)) + + #sess.run(enqueue_op, {image_paths_placeholder: image_paths_array, labels_placeholder: labels_array}) + ########################################################################################### + sess.run(iterator.initializer, feed_dict={image_paths_placeholder: image_paths_array, + labels_placeholder: labels_array, + batch_size_placeholder: args.batch_size}) + ########################################################################################### + + emb_array = np.zeros((nrof_examples, embedding_size)) + + #nrof_batches = int(np.ceil(nrof_examples / args.batch_size)) + # for i in range(nrof_batches): + # batch_size = min(nrof_examples-i*args.batch_size, args.batch_size) + # emb, lab = sess.run([embeddings, labels_batch], feed_dict={batch_size_placeholder: batch_size, + # learning_rate_placeholder: lr, phase_train_placeholder: True}) + # emb_array[lab,:] = emb + ########################################################################################### + nrof_batches = j + for i in range(nrof_batches): + emb, lab = sess.run([embeddings, labels_batch], feed_dict={learning_rate_placeholder: lr, + phase_train_placeholder: True}) + emb_array[lab, :] = emb + ########################################################################################### + + print('%.3f' % (time.time()-start_time)) + + # Select triplets based on the embeddings + print('Selecting suitable triplets for training') + triplets, nrof_random_negs, nrof_triplets = select_triplets(emb_array, num_per_class, + image_paths, args.people_per_batch, args.alpha) + selection_time = time.time() - start_time + print('(nrof_random_negs, nrof_triplets) = (%d, %d): time=%.3f seconds' % + (nrof_random_negs, nrof_triplets, selection_time)) + + # Perform training on the selected triplets + # nrof_batches = int(np.ceil(nrof_triplets*3/args.batch_size)) + ########################################################################################### + nrof_batches = int(np.floor(nrof_triplets * 3 / args.batch_size)) + ########################################################################################### + + triplet_paths = list(itertools.chain(*triplets)) + labels_array = np.reshape(np.arange(len(triplet_paths)),(-1,3)) + triplet_paths_array = np.reshape(np.expand_dims(np.array(triplet_paths),1), (-1,3)) + + #sess.run(enqueue_op, {image_paths_placeholder: triplet_paths_array, labels_placeholder: labels_array}) + ########################################################################################### + feed_dict = {image_paths_placeholder: triplet_paths_array, labels_placeholder: labels_array, + batch_size_placeholder: args.batch_size} + sess.run(iterator.initializer, feed_dict=feed_dict) + ########################################################################################### + + + nrof_examples = len(triplet_paths) + train_time = 0 + i = 0 + emb_array = np.zeros((nrof_examples, embedding_size)) + + #loss_array = np.zeros((nrof_triplets,)) + ########################################################################################### + loss_array = np.zeros(int(np.floor(nrof_examples / 3)),) + ########################################################################################### + + summary = tf.Summary() + step = 0 + while i < nrof_batches: + start_time = time.time() + + #batch_size = min(nrof_examples-i*args.batch_size, args.batch_size) + #feed_dict = {batch_size_placeholder: batch_size, learning_rate_placeholder: lr, phase_train_placeholder: True} + #err, _, step, emb, lab = sess.run([loss, train_op, global_step, embeddings, labels_batch], feed_dict=feed_dict) + ########################################################################################### + feed_dict = {learning_rate_placeholder: lr, phase_train_placeholder: True} + err, _, step, emb, lab = sess.run([loss, train_op, global_step, embeddings, + labels_batch], feed_dict=feed_dict) + ########################################################################################### + + emb_array[lab,:] = emb + loss_array[i] = err + duration = time.time() - start_time + print('Epoch: [%d][%d/%d]\tTime %.3f\tLoss %2.3f' % + (epoch, batch_number+1, args.epoch_size, duration, err)) + batch_number += 1 + i += 1 + train_time += duration + summary.value.add(tag='loss', simple_value=err) + + # Add validation loss and accuracy to summary + #pylint: disable=maybe-no-member + summary.value.add(tag='time/selection', simple_value=selection_time) + summary_writer.add_summary(summary, step) + return step + + +def select_triplets(embeddings, nrof_images_per_class, image_paths, people_per_batch, alpha): + """ Select the triplets for training + """ + trip_idx = 0 + emb_start_idx = 0 + num_trips = 0 + triplets = [] + + # VGG Face: Choosing good triplets is crucial and should strike a balance between + # selecting informative (i.e. challenging) examples and swamping training with examples that + # are too hard. This is achieve by extending each pair (a, p) to a triplet (a, p, n) by sampling + # the image n at random, but only between the ones that violate the triplet loss margin. The + # latter is a form of hard-negative mining, but it is not as aggressive (and much cheaper) than + # choosing the maximally violating example, as often done in structured output learning. + + for i in xrange(people_per_batch): + nrof_images = int(nrof_images_per_class[i]) + for j in xrange(1,nrof_images): + a_idx = emb_start_idx + j - 1 + neg_dists_sqr = np.sum(np.square(embeddings[a_idx] - embeddings), 1) + for pair in xrange(j, nrof_images): # For every possible positive pair. + p_idx = emb_start_idx + pair + pos_dist_sqr = np.sum(np.square(embeddings[a_idx]-embeddings[p_idx])) + neg_dists_sqr[emb_start_idx:emb_start_idx+nrof_images] = np.NaN + #all_neg = np.where(np.logical_and(neg_dists_sqr-pos_dist_sqr0: + rnd_idx = np.random.randint(nrof_random_negs) + n_idx = all_neg[rnd_idx] + triplets.append((image_paths[a_idx], image_paths[p_idx], image_paths[n_idx])) + #print('Triplet %d: (%d, %d, %d), pos_dist=%2.6f, neg_dist=%2.6f (%d, %d, %d, %d, %d)' % + # (trip_idx, a_idx, p_idx, n_idx, pos_dist_sqr, neg_dists_sqr[n_idx], nrof_random_negs, rnd_idx, i, j, emb_start_idx)) + trip_idx += 1 + + num_trips += 1 + + emb_start_idx += nrof_images + + np.random.shuffle(triplets) + return triplets, num_trips, len(triplets) + +# def sample_people(dataset, people_per_batch, images_per_person): +def sample_people(dataset, people_per_batch, images_per_person, batch_size): + nrof_images = people_per_batch * images_per_person + ########################################################################################### + j = int(np.floor(nrof_images / batch_size)) + nrof_images = j * batch_size + ########################################################################################### + + # Sample classes from the dataset + nrof_classes = len(dataset) + class_indices = np.arange(nrof_classes) + np.random.shuffle(class_indices) + + i = 0 + image_paths = [] + num_per_class = [] + sampled_class_indices = [] + # Sample images from these classes until we have enough + while len(image_paths)" + echo " " + echo "parameter explain: + --data_path # dataset of training + --output_path # output of training + --train_steps # max_step for training + --train_epochs # max_epoch for training + --batch_size # batch size + -h/--help show help message + " + exit 1 +fi + +# 参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --output_path* ]];then + output_path=`echo ${para#*=}` + elif [[ $para == --train_steps* ]];then + train_steps=`echo ${para#*=}` + elif [[ $para == --train_epochs* ]];then + train_epochs=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + +# 校验是否传入output_path,不需要修改 +if [[ $output_path == "" ]];then + output_path="./test/output/${ASCEND_DEVICE_ID}" +fi + +# 设置打屏日志文件名,请保留,文件名为${print_log} +print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log" +modelarts_flag=${MODELARTS_MODEL_PATH} +if [ x"${modelarts_flag}" != x ]; +then + echo "running without etp..." + print_log_name=`ls /home/ma-user/modelarts/log/ | grep proc-rank` + print_log="/home/ma-user/modelarts/log/${print_log_name}" +fi +echo "### get your log here : ${print_log}" + +CaseName="" +function get_casename() +{ + if [ x"${perf_flag}" = x1 ]; + then + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'perf' + else + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'acc' + fi +} + +# 跳转到code目录 +cd ${cur_path}/../ +rm -rf ./test/output/${ASCEND_DEVICE_ID} +mkdir -p ./test/output/${ASCEND_DEVICE_ID} + +# 训练开始时间记录,不需要修改 +start_time=$(date +%s) +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## + +#========================================================= +#========================================================= +#========训练执行命令,需要根据您的网络进行修改============== +#========================================================= +#========================================================= +# 基础参数,需要模型审视修改 +# 您的训练数据集在${data_path}路径下,请直接使用这个变量获取 +# 您的训练输出目录在${output_path}路径下,请直接使用这个变量获取 +# 您的其他基础参数,可以自定义增加,但是batch_size请保留,并且设置正确的值 + +if [ x"${modelarts_flag}" != x ]; +then + python3.7 ./train_cnn_trajectory_2d.py \ + --MAT_folder ${data_path}original_data/MOT17Det/mat/ \ + --temp_folder ${output_path}temp/ \ + --triplet_model ${data_path}model_data/20211209-124102/ \ + --save_dir ${output_path}model_data/traj/model.ckpt \ + --max_step 2001 \ + --output_path ${output_path} + +else + python3.7 ./train_cnn_trajectory_2d.py \ + --MAT_folder ${data_path}original_data/MOT17Det/mat/ \ + --temp_folder ${output_path}temp/ \ + --triplet_model ${data_path}model_data/20211209-124102/ \ + --save_dir ${output_path}model_data/traj/ \ + --max_step 2001 \ + --output_path ${output_path} +fi + +# 性能相关数据计算 +StepTime=`grep "sec/step :" ${print_log} | tail -n 10 | awk '{print $NF}' | awk '{sum+=$1} END {print sum/NR}'` +FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${StepTime}'}'` + +# 精度相关数据计算 +train_accuracy=`grep "Final Accuracy accuracy" ${print_log} | awk '{print $NF}'` +# 提取所有loss打印信息 +grep "loss :" ${print_log} | awk -F ":" '{print $4}' | awk -F "-" '{print $1}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt + + +########################################################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +########################################################### + +# 判断本次执行是否正确使用Ascend NPU +use_npu_flag=`grep "The model has been compiled on the Ascend AI processor" ${print_log} | wc -l` +if [ x"${use_npu_flag}" == x0 ]; +then + echo "------------------ ERROR NOTICE START ------------------" + echo "ERROR, your task haven't used Ascend NPU, please check your npu Migration." + echo "------------------ ERROR NOTICE END------------------" +else + echo "------------------ INFO NOTICE START------------------" + echo "INFO, your task have used Ascend NPU, please check your result." + echo "------------------ INFO NOTICE END------------------" +fi + +# 获取最终的casename,请保留,case文件名为${CaseName} +get_casename + +# 重命名loss文件 +if [ -f ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ]; +then + mv ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt +fi + +# 训练端到端耗时 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +echo "------------------ Final result ------------------" +# 输出性能FPS/单step耗时/端到端耗时 +echo "Final Performance images/sec : $FPS" +echo "Final Performance sec/step : $StepTime" +echo "E2E Training Duration sec : $e2e_time" + +# 输出训练精度 +echo "Final Train Accuracy : ${train_accuracy}" + +# 最后一个迭代loss值,不需要修改 +ActualLoss=(`awk 'END {print $NF}' $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt`) + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = `uname -m`" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/test/train_performance_1p.sh new file mode 100644 index 0000000000000000000000000000000000000000..73af1fb0a74148599814f570cec836c3b103438c --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/test/train_performance_1p.sh @@ -0,0 +1,195 @@ +#!/bin/bash +export LD_PRELOAD='/usr/lib64/libgomp.so.1:/usr/libexec/coreutils/libstdbuf.so' +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## +# shell脚本所在路径 +cur_path=`echo $(cd $(dirname $0);pwd)` + +# 判断当前shell是否是performance +perf_flag=`echo $0 | grep performance | wc -l` + +# 当前执行网络的名称 +Network=`echo $(cd $(dirname $0);pwd) | awk -F"/" '{print $(NF-1)}'` + +export RANK_SIZE=1 +export RANK_ID=0 +export JOB_ID=10087 + +# 路径参数初始化 +data_path="" +output_path="" + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --data_path # dataset of training + --output_path # output of training + --train_steps # max_step for training + --train_epochs # max_epoch for training + --batch_size # batch size + -h/--help show help message + " + exit 1 +fi + +# 参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --output_path* ]];then + output_path=`echo ${para#*=}` + elif [[ $para == --train_steps* ]];then + train_steps=`echo ${para#*=}` + elif [[ $para == --train_epochs* ]];then + train_epochs=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + +# 校验是否传入output_path,不需要修改 +if [[ $output_path == "" ]];then + output_path="./test/output/${ASCEND_DEVICE_ID}" +fi + +# 设置打屏日志文件名,请保留,文件名为${print_log} +print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log" +modelarts_flag=${MODELARTS_MODEL_PATH} +if [ x"${modelarts_flag}" != x ]; +then + echo "running without etp..." + print_log_name=`ls /home/ma-user/modelarts/log/ | grep proc-rank` + print_log="/home/ma-user/modelarts/log/${print_log_name}" +fi +echo "### get your log here : ${print_log}" + +CaseName="" +function get_casename() +{ + if [ x"${perf_flag}" = x1 ]; + then + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'perf' + else + CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'acc' + fi +} + +# 跳转到code目录 +cd ${cur_path}/../ +rm -rf ./test/output/${ASCEND_DEVICE_ID} +mkdir -p ./test/output/${ASCEND_DEVICE_ID} + +# 训练开始时间记录,不需要修改 +start_time=$(date +%s) +########################################################## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +#########第3行 至 100行,请一定不要、不要、不要修改########## +########################################################## + +#========================================================= +#========================================================= +#========训练执行命令,需要根据您的网络进行修改============== +#========================================================= +#========================================================= +# 基础参数,需要模型审视修改 +# 您的训练数据集在${data_path}路径下,请直接使用这个变量获取 +# 您的训练输出目录在${output_path}路径下,请直接使用这个变量获取 +# 您的其他基础参数,可以自定义增加,但是batch_size请保留,并且设置正确的值 + +if [ x"${modelarts_flag}" != x ]; +then + python3.7 ./train_cnn_trajectory_2d.py \ + --MAT_folder ${data_path}original_data/MOT17Det/mat/ \ + --temp_folder ${output_path}temp/ \ + --triplet_model ${data_path}model_data/20211209-124102/ \ + --save_dir ${output_path}model_data/traj/model.ckpt \ + --max_step 15 \ + --output_path ${output_path} + +else + python3.7 ./train_cnn_trajectory_2d.py \ + --MAT_folder ${data_path}original_data/MOT17Det/mat/ \ + --temp_folder ${output_path}temp/ \ + --triplet_model ${data_path}model_data/20211209-124102/ \ + --save_dir ${output_path}model_data/traj/ \ + --max_step 15 \ + --output_path ${output_path} +fi + +# 性能相关数据计算 +StepTime=`grep "sec/step :" ${print_log} | tail -n 10 | awk '{print $NF}' | awk '{sum+=$1} END {print sum/NR}'` +FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${StepTime}'}'` + +# 精度相关数据计算 +train_accuracy=`grep "Final Accuracy accuracy" ${print_log} | awk '{print $NF}'` +# 提取所有loss打印信息 +grep "loss :" ${print_log} | awk -F ":" '{print $4}' | awk -F "-" '{print $1}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt + + +########################################################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +#########后面的所有内容请不要修改########################### +########################################################### + +# 判断本次执行是否正确使用Ascend NPU +use_npu_flag=`grep "The model has been compiled on the Ascend AI processor" ${print_log} | wc -l` +if [ x"${use_npu_flag}" == x0 ]; +then + echo "------------------ ERROR NOTICE START ------------------" + echo "ERROR, your task haven't used Ascend NPU, please check your npu Migration." + echo "------------------ ERROR NOTICE END------------------" +else + echo "------------------ INFO NOTICE START------------------" + echo "INFO, your task have used Ascend NPU, please check your result." + echo "------------------ INFO NOTICE END------------------" +fi + +# 获取最终的casename,请保留,case文件名为${CaseName} +get_casename + +# 重命名loss文件 +if [ -f ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ]; +then + mv ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt +fi + +# 训练端到端耗时 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +echo "------------------ Final result ------------------" +# 输出性能FPS/单step耗时/端到端耗时 +echo "Final Performance images/sec : $FPS" +echo "Final Performance sec/step : $StepTime" +echo "E2E Training Duration sec : $e2e_time" + +# 输出训练精度 +echo "Final Train Accuracy : ${train_accuracy}" + +# 最后一个迭代loss值,不需要修改 +ActualLoss=(`awk 'END {print $NF}' $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt`) + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = `uname -m`" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/track_lib.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/track_lib.py new file mode 100644 index 0000000000000000000000000000000000000000..c6ac274d02d16890cb607e388aced790d98f5958 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/track_lib.py @@ -0,0 +1,962 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +/* + * Copyright ©2019 Gaoang Wang. All rights reserved. Permission is + * hereby granted for academic use. No other use, copying, distribution, or modification + * is permitted without prior written consent. Copyrights for + * third-party components of this work must be honored. Instructors + * interested in reusing these course materials should contact the + * author. + */ + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +import numpy as np +import argparse +import facenet +import lfw +import os +import sys +import cv2 +import pickle +import time +from functools import wraps + +from tensorflow.python.ops import data_flow_ops +from sklearn import metrics +from scipy.optimize import brentq +from scipy import interpolate +from scipy.interpolate import interp1d +from scipy.io import loadmat +from scipy import misc +from scipy import stats +from scipy.spatial import distance +import matplotlib.pyplot as plt +import matplotlib.patches as patches +from PIL import Image + +from skimage import data +from skimage.color import rgb2gray +from skimage.feature import match_descriptors, ORB, plot_matches +from skimage.measure import ransac +from skimage.transform import FundamentalMatrixTransform +from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.gaussian_process.kernels import RBF, ConstantKernel, PairwiseKernel, DotProduct, RationalQuadratic +from sklearn.decomposition import SparseCoder + +def tracklet_classify(A, pca, D, knn, clf_coding): + encode_fea = np.zeros((len(A),len(D))) + for n in range(len(A)): + pca_fea = pca.transform(A[n]) + dist = distance.cdist(pca_fea, D, 'euclidean') + x = np.zeros((len(pca_fea),len(D))) + for k in range(len(dist)): + sort_idx = np.argsort(dist[k,:]) + temp_D = D[sort_idx[0:knn],:] + temp_coder = SparseCoder(dictionary=temp_D, transform_n_nonzero_coefs=10, + transform_alpha=0.05, transform_algorithm='lasso_lars') + #import pdb; pdb.set_trace() + xx = np.zeros((1,D.shape[1])) + xx[:,:] = pca_fea[k,:] + temp_x = temp_coder.transform(xx) + x[k,sort_idx[0:knn]] = temp_x + + encode_fea[n,:] = np.max(x, axis=0) + pred_set_label = clf_coding.predict(encode_fea) + return pred_set_label + +def interp_batch(total_batch_x): + interp_batch_x = total_batch_x.copy() + N_batch = total_batch_x.shape[0] + for n in range(N_batch): + temp_idx = np.where(total_batch_x[n,0,:,1]==1)[0] + t1 = int(temp_idx[-1]) + temp_idx = np.where(total_batch_x[n,0,:,2]==1)[0] + t2 = int(temp_idx[0]) + if t2-t1<=1: + continue + interp_t = np.array(range(t1+1,t2)) + for k in range(total_batch_x.shape[1]): + #temp_std = np.std(total_batch_x[n,k,total_batch_x[n,k,:,0]!=0,0]) + + temp_std1 = np.std(total_batch_x[n,k,total_batch_x[n,0,:,1]!=0,0]) + temp_std2 = np.std(total_batch_x[n,k,total_batch_x[n,0,:,2]!=0,0]) + + x_p = [t1,t2] + f_p = [total_batch_x[n,k,t1,0],total_batch_x[n,k,t2,0]] + #interp_batch_x[n,k,t1+1:t2,0] = np.interp(interp_t,x_p,f_p)#+np.random.normal(0, temp_std, t2-t1-1) + interp_batch_x[n,k,t1+1:t2,0] = np.interp(interp_t,x_p,f_p)+np.random.normal(0, (temp_std1+temp_std2)*0.5, t2-t1-1) + return interp_batch_x + +def GP_regression(tr_x,tr_y,test_x): + A = np.ones((len(tr_x),2)) + A[:,0] = tr_x[:,0] + p = np.matmul(np.linalg.pinv(A),tr_y) + mean_tr_y = np.matmul(A,p) + A = np.ones((len(test_x),2)) + A[:,0] = test_x[:,0] + mean_test_y = np.matmul(A,p) + kernel = ConstantKernel(100,(1e-5, 1e5))*RBF(1, (1e-5, 1e5))+RBF(1, (1e-5, 1e5)) + gp = GaussianProcessRegressor(kernel=kernel, alpha=1, n_restarts_optimizer=9) + gp.fit(tr_x, tr_y-mean_tr_y) + test_y, sigma = gp.predict(test_x, return_std=True) + test_y = test_y+mean_test_y + #import pdb; pdb.set_trace() + return test_y + +def show_trajectory(tracklet_mat, obj_id): + max_len = 60 + check_fr = np.where(tracklet_mat['xmin_mat'][obj_id,:]!=-1)[0] + + test_xmin = tracklet_mat['xmin_mat'][obj_id,:].copy() + test_ymin = tracklet_mat['ymin_mat'][obj_id,:].copy() + test_xmax = tracklet_mat['xmax_mat'][obj_id,:].copy() + test_ymax = tracklet_mat['ymax_mat'][obj_id,:].copy() + + t1 = max(0,check_fr[0]-max_len) + t2 = min(check_fr[-1]+max_len,tracklet_mat['xmin_mat'].shape[1]-1) + test_t = np.concatenate((np.array(range(t1,check_fr[0])),np.array(range(check_fr[-1],t2)))) + test_t = test_t.astype(int) + aa = np.zeros((len(check_fr),1)) + bb = np.zeros((len(check_fr),1)) + cc = np.zeros((len(test_t),1)) + aa[:,0] = check_fr + cc[:,0] = test_t + bb[:,0] = tracklet_mat['xmin_mat'][obj_id,check_fr] + dd = GP_regression(aa,bb,cc) + #import pdb; pdb.set_trace() + test_xmin[test_t] = dd[:,0] + bb[:,0] = tracklet_mat['ymin_mat'][obj_id,check_fr] + dd = GP_regression(aa,bb,cc) + test_ymin[test_t] = dd[:,0] + bb[:,0] = tracklet_mat['xmax_mat'][obj_id,check_fr] + dd = GP_regression(aa,bb,cc) + test_xmax[test_t] = dd[:,0] + bb[:,0] = tracklet_mat['ymax_mat'][obj_id,check_fr] + dd = GP_regression(aa,bb,cc) + test_ymax[test_t] = dd[:,0] + + t_range = np.where(test_xmin!=-1)[0] + #if obj_id==2: + # import pdb; pdb.set_trace() + plt.plot(t_range,test_xmin[t_range],'k.',t_range,test_ymin[t_range],'k.', + t_range,test_xmax[t_range],'k.',t_range,test_ymax[t_range],'k.', + check_fr,tracklet_mat['xmin_mat'][obj_id,check_fr],'b.',check_fr,tracklet_mat['ymin_mat'][obj_id,check_fr],'r.', + check_fr,tracklet_mat['xmax_mat'][obj_id,check_fr],'g.',check_fr,tracklet_mat['ymax_mat'][obj_id,check_fr],'y.') + plt.show() + #import pdb; pdb.set_trace() + #plt.close('all') + return + +def remove_det(det_M, det_thresh, y_thresh, h_thresh, y_thresh2, ratio_1, h_thresh2, y_thresh3, y_thresh4): + + remove_idx = [] + + # remove low det score + for n in range(len(det_M)): + if det_M[n,-1]y_thresh2: + remove_idx.append(n) + + # remove thin objects + for n in range(len(det_M)): + if (det_M[n,4]/det_M[n,3])>ratio_1: + remove_idx.append(n) + + # remove small object + for n in range(len(det_M)): + if det_M[n,4]h_thresh2: + remove_idx.append(n) + + # remove ymax + for n in range(len(det_M)): + if det_M[n,2]+det_M[n,4]>y_thresh3: + remove_idx.append(n) + + # remove ymax + for n in range(len(det_M)): + if det_M[n,2]+det_M[n,4]=N_fr-1: + continue + start_fr = time_interval[1]+1-min_len + end_fr = time_interval[1]+1 + ext_fr = min(time_interval[1]+extend_len+1,N_fr-1) + + A = np.ones((min_len,2)) + A[:,0] = np.array(range(start_fr,end_fr)) + w[:,0] = xmaxs[start_fr:end_fr]-xmins[start_fr:end_fr] + h[:,0] = ymaxs[start_fr:end_fr]-ymins[start_fr:end_fr] + mean_w = 0 + mean_h = 0 + if drt==0: + mean_w = np.mean(w[int(min_len/2):,0]) + mean_h = np.mean(h[int(min_len/2):,0]) + else: + mean_w = np.mean(w[0:int(min_len/2),0]) + mean_h = np.mean(h[0:int(min_len/2),0]) + + dist1 = (ymins[int((start_fr+end_fr)/2)]+ymaxs[int((start_fr+end_fr)/2)])/2 + dist2 = (xmins[int((start_fr+end_fr)/2)]+xmaxs[int((start_fr+end_fr)/2)])/2 + v_flag = 0 #top + h_flag = 0 #left + if dist1>img_size[1]/2: + dist1 = img_size[1]-dist1 + v_flag = 1 + if dist2>img_size[0]/2: + dist2 = img_size[0]-dist2 + h_flag = 1 + # top bnd + if dist1=dist2 and h_flag==0: + x_center[:,0] = (xmaxs[start_fr:end_fr]-mean_w/2) + y_center[:,0] = (ymins[start_fr:end_fr]+ymaxs[start_fr:end_fr])/2 + # right bnd + elif dist1>=dist2 and h_flag==1: + x_center[:,0] = (xmins[start_fr:end_fr]+mean_w/2) + y_center[:,0] = (ymins[start_fr:end_fr]+ymaxs[start_fr:end_fr])/2 + + #x_center[:,0] = (xmins[start_fr:end_fr]+xmaxs[start_fr:end_fr])/2 + #y_center[:,0] = (ymins[start_fr:end_fr]+ymaxs[start_fr:end_fr])/2 + #if fr_id==10: + # import pdb; pdb.set_trace() + px = np.matmul(np.linalg.pinv(A),x_center) + err_x = np.sum(np.absolute(np.matmul(A,px)-x_center)/mean_w)/min_len + if err_x>reg_thresh: # trajectory cannot be predicted + continue + py = np.matmul(np.linalg.pinv(A),y_center) + err_y = np.sum(np.absolute(np.matmul(A,py)-y_center)/mean_h)/min_len + if err_y>reg_thresh: # trajectory cannot be predicted + continue + + # slow motion check + static_flag = 0 + diff_x = abs((xmins[time_interval[1]]+xmaxs[time_interval[1]])/2-(xmins[time_interval[0]]+xmaxs[time_interval[0]])/2) + diff_y = abs((ymins[time_interval[1]]+ymaxs[time_interval[1]])/2-(ymins[time_interval[0]]+ymaxs[time_interval[0]])/2) + speed = np.sqrt(np.power(diff_x,2)+np.power(diff_y,2))/(time_interval[1]-time_interval[0]+1) + if speedstatic_len: # static person + static_flag = 1 + + #ext_fr = max(time_interval[0]-extend_len,0) + if static_flag==1: + mean_x = np.mean(x_center[:,0]) + mean_y = np.mean(y_center[:,0]) + #mean_w = np.mean(w[:,0]) + #mean_h = np.mean(h[:,0]) + if drt==0: + extend_xmins[0:time_interval[0]] = mean_x-mean_w/2 + extend_ymins[0:time_interval[0]] = mean_y-mean_h/2 + extend_xmaxs[0:time_interval[0]] = mean_x+mean_w/2 + extend_ymaxs[0:time_interval[0]] = mean_y+mean_h/2 + else: + if time_interval[1]bnd_margin and abs(img_size[0]-x0)>bnd_margin and abs(y0)>bnd_margin and abs(img_size[1]-y0)>bnd_margin: + continue + + t_test = np.array(range(t1,t2)) + test_t = np.zeros((len(t_test),1)) + test_t[:,0] = t_test + N_t = t2-t1 + if N_t==0: + continue + A = np.ones((N_t,2)) + A[:,0] = t_test + + tr_t = np.zeros((end_fr-start_fr,1)) + tr_t[:,0] = np.array(range(start_fr,end_fr)) + tr_x = np.zeros((end_fr-start_fr,1)) + tr_x[:,0] = xmins[start_fr:end_fr] + tr_y = np.zeros((end_fr-start_fr,1)) + tr_y[:,0] = ymins[start_fr:end_fr] + tr_w = np.zeros((end_fr-start_fr,1)) + tr_w[:,0] = xmaxs[start_fr:end_fr]-xmins[start_fr:end_fr] + tr_h = np.zeros((end_fr-start_fr,1)) + tr_h[:,0] = ymaxs[start_fr:end_fr]-ymins[start_fr:end_fr] + + test_x = GP_regression(tr_t,tr_x,test_t) + test_y = GP_regression(tr_t,tr_y,test_t) + test_w = GP_regression(tr_t,tr_w,test_t) + test_h = GP_regression(tr_t,tr_h,test_t) + + if drt==0: + + max_idx = np.where(np.logical_or(test_himg_size[0]-1] = img_size[0]-1 + extend_ymins[extend_ymins<1] = 1 + extend_ymins[extend_ymins>img_size[1]-1] = img_size[1]-1 + extend_xmaxs[extend_xmaxs<1] = 1 + extend_xmaxs[extend_xmaxs>img_size[0]-1] = img_size[0]-1 + extend_ymaxs[extend_ymaxs<1] = 1 + extend_ymaxs[extend_ymaxs>img_size[1]-1] = img_size[1]-1 + if len(neg_idx)!=0: + extend_xmins[neg_idx] = -1 + extend_ymins[neg_idx] = -1 + extend_xmaxs[neg_idx] = -1 + extend_ymaxs[neg_idx] = -1 + + + neg_idx = np.where(np.logical_or(extend_ymaxs-extend_yminsimg_size[1]] = img_size[1] + ymax[ymax>img_size[0]] = img_size[0] + new_bbox[:,2] = xmax-new_bbox[:,0]+1 + new_bbox[:,3] = ymax-new_bbox[:,1]+1 + return new_bbox + +def estimateF(img1, img2): + + np.random.seed(0) + + #img1, img2, groundtruth_disp = data.stereo_motorcycle() + + img1_gray, img2_gray = map(rgb2gray, (img1, img2)) + + descriptor_extractor = ORB() + + descriptor_extractor.detect_and_extract(img1_gray) + keypoints_left = descriptor_extractor.keypoints + descriptors_left = descriptor_extractor.descriptors + + descriptor_extractor.detect_and_extract(img2_gray) + keypoints_right = descriptor_extractor.keypoints + descriptors_right = descriptor_extractor.descriptors + + matches = match_descriptors(descriptors_left, descriptors_right, + cross_check=True) + + # Estimate the epipolar geometry between the left and right image. + + model, inliers = ransac((keypoints_left[matches[:, 0]], + keypoints_right[matches[:, 1]]), + FundamentalMatrixTransform, min_samples=8, + residual_threshold=1, max_trials=5000) + + inlier_keypoints_left = keypoints_left[matches[inliers, 0]] + inlier_keypoints_right = keypoints_right[matches[inliers, 1]] + + print("Number of matches:", matches.shape[0]) + print("Number of inliers:", inliers.sum()) + + # Visualize the results. + ''' + fig, ax = plt.subplots(nrows=2, ncols=1) + + plt.gray() + + plot_matches(ax[0], img1, img2, keypoints_left, keypoints_right, + matches[inliers], only_matches=True) + ax[0].axis("off") + ax[0].set_title("Inlier correspondences") + + plt.show() + ''' + #import pdb; pdb.set_trace() + + return model, matches.shape[0], inliers.sum(), inlier_keypoints_left, inlier_keypoints_right + +def color_table(num): + digit = '0123456789ABCDEF' + table = [] + for n in range(num): + select_idx = np.random.randint(16, size=6) + for k in range(6): + if k==0: + temp_color = digit[select_idx[k]] + else: + temp_color = temp_color+digit[select_idx[k]] + table.append(temp_color) + return table + +def linear_pred(y): + if len(y)==1: + return y + else: + x = np.array(range(0,len(y))) + slope, intercept, _, _, _ = stats.linregress(x,y) + return slope*len(y)+intercept + +def linear_pred_v2(tr_t, tr_y, ts_t): + ts_y = np.ones(len(ts_t)) + if len(tr_t)==1: + ts_y = ts_y*tr_y + else: + slope, intercept, _, _, _ = stats.linregress(tr_t,tr_y) + ts_y = slope*ts_t+intercept + return ts_y + +def file_name(num, length): + cnt = 1 + temp = num + while 1: + temp = int(temp/10) + if temp>0: + cnt = cnt+1 + else: + break + num_len = cnt + for n in range(length-num_len): + if n==0: + out_str = '0' + else: + out_str = out_str+'0' + if length-num_len>0: + return out_str+str(num) + else: + return str(num) + +#bbox = [x, y, w, h] +def get_IOU(bbox1, bbox2): + area1 = bbox1[2]*bbox1[3] + area2 = bbox2[2]*bbox2[3] + x1 = max(bbox1[0], bbox2[0]) + y1 = max(bbox1[1], bbox2[1]) + x2 = min(bbox1[0]+bbox1[2]-1, bbox2[0]+bbox2[2]-1) + y2 = min(bbox1[1]+bbox1[3]-1, bbox2[1]+bbox2[3]-1) + + #import pdb; pdb.set_trace() + overlap_area = max(0, (x2-x1+1))*max(0, (y2-y1+1)) + ratio = overlap_area/(area1+area2-overlap_area) + return ratio,overlap_area,area1,area2 + +def get_overlap(bbox1, bbox2): + num1 = bbox1.shape[0] + num2 = bbox2.shape[0] + overlap_mat = np.zeros((num1, num2)) + overlap_area = np.zeros((num1, num2)) + area1 = np.zeros(num1) + area2 = np.zeros(num2) + for n in range(num1): + for m in range(num2): + + #import pdb; pdb.set_trace() + overlap_mat[n,m],overlap_area[n,m],area1[n],area2[m] = get_IOU(bbox1[n,:], bbox2[m,:]) + + return overlap_mat,overlap_area,area1,area2 + +def load_detection(file_name, dataset): + + # M=[fr_id (from 1), x, y, w, h, det_score] + if dataset=='Underwater': + f = np.loadtxt(file_name, delimiter=',') + f = np.array(f) + M = np.zeros((f.shape[0], 6)) + M[:,0] = f[:,0]+1 + M[:,1:5] = f[:,1:5] + M[:,5] = f[:,5] + M[:,3] = M[:,3]-M[:,1]+1 + M[:,4] = M[:,4]-M[:,2]+1 + return M + if dataset=='UA-Detrac': + f = np.loadtxt(file_name, delimiter=',') + f = np.array(f) + M = np.zeros((f.shape[0], 6)) + M[:,0] = f[:,0] + M[:,1:6] = f[:,2:7] + #import pdb; pdb.set_trace() + return M + if dataset=='KITTI': + f = np.loadtxt(det_path,delimiter=' ',dtype='str') + mask = np.zeros((len(f),1)) + for n in range(len(f)): + if f[n][2]=='Car' or f[n][2]=='Van': + mask[n,0] = 1 + num = int(np.sum(mask)) + M = np.zeros((num, 6)) + cnt = 0 + for n in range(len(f)): + if mask[n,0]==1: + M[cnt,0] = int(float(f[n][0]))+1 + M[cnt,1] = int(float(f[n][6])) + M[cnt,2] = int(float(f[n][7])) + M[cnt,3] = int(float(f[n][8]))-int(float(f[n][6]))+1 + M[cnt,4] = int(float(f[n][9]))-int(float(f[n][7]))+1 + M[cnt,5] = float(f[n][17]) + cnt = cnt+1 + + #import pdb; pdb.set_trace() + return M + + if dataset=='KITTI_3d': + f = np.loadtxt(file_name, delimiter=',') + f = np.array(f) + mask = np.zeros((len(f),1)) + for n in range(len(f)): + # only for pedestrian + #******************* + if f[n][7]==4 or f[n][7]==5 or f[n][7]==6: + mask[n,0] = 1 + num = int(np.sum(mask)) + + M = np.zeros((num, 10)) + cnt = 0 + for n in range(len(f)): + if mask[n,0]==1: + M[cnt,0] = int(float(f[n][0])) + M[cnt,1] = int(float(f[n][2])) + M[cnt,2] = int(float(f[n][3])) + M[cnt,3] = int(float(f[n][4])) + M[cnt,4] = int(float(f[n][5])) + M[cnt,5] = 1.0 + M[cnt,6] = float(f[n][8]) + M[cnt,7] = float(f[n][9]) + M[cnt,8] = float(f[n][10]) + M[cnt,9] = float(f[n][11]) + cnt = cnt+1 + #import pdb; pdb.set_trace() + return M + + if dataset=='MOT_tr': + f = np.loadtxt(file_name, delimiter=',') + f = np.array(f) + M = np.zeros((f.shape[0], 6)) + M[:,0] = f[:,0] + M[:,1:6] = f[:,2:7] + #import pdb; pdb.set_trace() + return M + if dataset=='YOLO': + f = np.loadtxt(file_name, dtype=str, delimiter=',') + f = np.array(f) + M = np.zeros((f.shape[0], 6)) + cnt = 0 + for n in range(len(f)): + M[cnt,0] = int(float(f[n][0]))+1 + M[cnt,1] = int(float(f[n][2])) + M[cnt,2] = int(float(f[n][3])) + M[cnt,3] = int(float(f[n][4])) + M[cnt,4] = int(float(f[n][5])) + M[cnt,5] = float(f[n][6])/100.0 + cnt = cnt+1 + return M + if dataset=='MOT_gt': + # fr_id, x, y, w, h, obj_id, class_id + f = np.loadtxt(file_name, delimiter=',') + f = np.array(f) + M = np.zeros((f.shape[0], 7)) + M[:,0] = f[:,0] + M[:,1:5] = f[:,2:6] + M[:,5] = f[:,1] + M[:,6] = f[:,7] + #import pdb; pdb.set_trace() + return M + if dataset=='MOT_1': + # fr_id, x, y, w, h, det_score, svm_score, h_score, y_score, IOU_gt + f = np.loadtxt(file_name, delimiter=',') + f = np.array(f) + M = np.zeros((f.shape[0], 10)) + M[:,0] = f[:,0] + M[:,1:6] = f[:,2:7] + M[:,6:10] = f[:,10:14] + #import pdb; pdb.set_trace() + return M + if dataset=='KITTI_3d_2': + f = np.loadtxt(file_name, dtype=str, delimiter=',') + f = np.array(f) + mask = np.zeros((len(f),1)) + for n in range(len(f)): + # only for pedestrian + if f[n][11]=="Pedestrian" or f[n][11]=="Cyclist": + mask[n,0] = 1 + num = int(np.sum(mask)) + + M = np.zeros((num, 10)) + cnt = 0 + for n in range(len(f)): + if mask[n,0]==1: + M[cnt,0] = int(float(f[n][0])) + M[cnt,1] = int(float(f[n][1])) + M[cnt,2] = int(float(f[n][2])) + M[cnt,3] = int(float(f[n][3])) + M[cnt,4] = int(float(f[n][4])) + M[cnt,5] = float(f[n][10])/100.0 + M[cnt,6] = float(f[n][5]) + M[cnt,7] = float(f[n][7]) + M[cnt,8] = float(f[n][8]) + M[cnt,9] = float(f[n][9]) + cnt = cnt+1 + #import pdb; pdb.set_trace() + return M + if dataset=='chongqing': + f = np.loadtxt(file_name, dtype=str, delimiter=',') + f = np.array(f) + num = len(f) + M = np.zeros((num, 10)) + cnt = 0 + for n in range(len(f)): + M[cnt,0] = int(float(f[n][0])) + M[cnt,1] = int(float(f[n][2])) + M[cnt,2] = int(float(f[n][3])) + M[cnt,3] = int(float(f[n][4])) + M[cnt,4] = int(float(f[n][5])) + M[cnt,5] = float(f[n][6])/100 + M[cnt,6] = float(f[n][2]) + M[cnt,7] = float(f[n][3]) + M[cnt,8] = float(f[n][4]) + M[cnt,9] = float(f[n][5]) + cnt = cnt+1 + return M + +def bbox_associate(overlap_mat, IOU_thresh): + idx1 = [] + idx2 = [] + new_overlap_mat = overlap_mat.copy() + while 1: + idx = np.unravel_index(np.argmax(new_overlap_mat, axis=None), new_overlap_mat.shape) + if new_overlap_mat[idx]IOU_thresh: + if s1>s2: + cand_idx[n2] = 0 + else: + cand_idx[n1] = 0 + if merge_mode==1: + if r1>IOU_thresh or r2>IOU_thresh: + if s1>s2: + cand_idx[n2] = 0 + else: + cand_idx[n1] = 0 + idx = np.where(cand_idx==1)[0] + new_bbox = bbox[idx,:] + return idx, new_bbox + +def estimate_h_y(hloc, yloc): + # h + A = np.ones((hloc.shape[0],2)) + A[:,0] = yloc + iters = 10 + W = np.identity(hloc.shape[0]) + for k in range(iters): + A_w = np.matmul(W,A) + b_w = np.matmul(W,hloc) + ph = np.matmul(np.linalg.pinv(A_w),b_w) + y_err = np.matmul(A,ph)-hloc + err_std = np.std(y_err) + w = np.exp(-np.power(y_err,2)/err_std*err_std) + W = np.diag(w) + + # y + A = np.ones((hloc.shape[0],2)) + A[:,0] = hloc + iters = 10 + W = np.identity(hloc.shape[0]) + for k in range(iters): + A_w = np.matmul(W,A) + b_w = np.matmul(W,yloc) + py = np.matmul(np.linalg.pinv(A_w),b_w) + y_err = np.matmul(A,py)-yloc + err_std = np.std(y_err) + w = np.exp(-np.power(y_err,2)/err_std*err_std) + W = np.diag(w) + return ph, py + +def extract_tracklet_feature(tracklet_mat, k, idx): + tracklet_fea = np.zeros(17) + tracklet_fea[0] = len(idx) + tracklet_fea[1] = np.min(tracklet_mat['det_score_mat'][k,idx]) + tracklet_fea[2] = np.max(tracklet_mat['det_score_mat'][k,idx]) + tracklet_fea[3] = np.mean(tracklet_mat['det_score_mat'][k,idx]) + tracklet_fea[4] = np.std(tracklet_mat['det_score_mat'][k,idx]) + tracklet_fea[5] = np.min(tracklet_mat['svm_score_mat'][k,idx]) + tracklet_fea[6] = np.max(tracklet_mat['svm_score_mat'][k,idx]) + tracklet_fea[7] = np.mean(tracklet_mat['svm_score_mat'][k,idx]) + tracklet_fea[8] = np.std(tracklet_mat['svm_score_mat'][k,idx]) + tracklet_fea[9] = np.min(tracklet_mat['h_score_mat'][k,idx]) + tracklet_fea[10] = np.max(tracklet_mat['h_score_mat'][k,idx]) + tracklet_fea[11] = np.mean(tracklet_mat['h_score_mat'][k,idx]) + tracklet_fea[12] = np.std(tracklet_mat['h_score_mat'][k,idx]) + tracklet_fea[13] = np.min(tracklet_mat['y_score_mat'][k,idx]) + tracklet_fea[14] = np.max(tracklet_mat['y_score_mat'][k,idx]) + tracklet_fea[15] = np.mean(tracklet_mat['y_score_mat'][k,idx]) + tracklet_fea[16] = np.std(tracklet_mat['y_score_mat'][k,idx]) + return tracklet_fea diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/tracklet_classifier_train.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/tracklet_classifier_train.py new file mode 100644 index 0000000000000000000000000000000000000000..0188c85645e049f6c8557f7d716c7ec4954247fc --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/tracklet_classifier_train.py @@ -0,0 +1,413 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +/* + * Copyright ©2019 Gaoang Wang. All rights reserved. Permission is + * hereby granted for academic use. No other use, copying, distribution, or modification + * is permitted without prior written consent. Copyrights for + * third-party components of this work must be honored. Instructors + * interested in reusing these course materials should contact the + * author. + */ + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +import numpy as np +import argparse +import facenet +import lfw +import os +import sys +import cv2 +import pickle +import time +from functools import wraps + +from tensorflow.python.ops import data_flow_ops +from sklearn import metrics +from scipy.optimize import brentq +from scipy import interpolate +from scipy.interpolate import interp1d +from scipy.io import loadmat +from scipy import misc +from scipy import stats +import matplotlib.pyplot as plt +import matplotlib.patches as patches +from PIL import Image +from sklearn import svm +from sklearn.externals import joblib +from sklearn.ensemble import RandomForestClassifier +from sklearn.datasets import make_classification +import track_lib + +train_seqs = ['MOT17-02-FRCNN','MOT17-09-FRCNN','MOT17-10-FRCNN','MOT17-11-FRCNN','MOT17-13-FRCNN'] + #'MOT17-02-DPM','MOT17-04-DPM','MOT17-05-DPM','MOT17-09-DPM','MOT17-10-DPM','MOT17-11-DPM','MOT17-13-DPM', + #['MOT17-02-SDP','MOT17-04-SDP','MOT17-05-SDP','MOT17-09-SDP','MOT17-10-SDP','MOT17-11-SDP','MOT17-13-SDP'] + +det_path = 'D:/Data/MOT/MOT17Labels/train' +gt_path = 'D:/Data/MOT/MOT17Labels/train' +cnn_fea_path = 'D:/Data/MOT/MOT17_train_det_crop' +save_cnn_svm_path = 'D:/Data/MOT/MOT17_train_det_crop/cnn_svm_MOT17.pkl' +save_det_path = 'D:/Data/MOT/MOT17Labels/train' +track_struct_path = 'D:/Data/MOT/track_struct' +save_classifier_path = 'D:/Data/MOT/MOT17_train_det_crop/rand_forest_MOT17_FRCNN.pkl' +F_set_path = 'D:/Data/MOT/geometry_info' +img_folder = 'D:/Data/MOT/MOT17Det/train' + +fea_size = 512 +img_size = [[1920,1080],[1920,1080],[1920,1080],[1920,1080],[1920,1080]] + #[1920,1080],[1920,1080],[640,480],[1920,1080],[1920,1080],[1920,1080],[1920,1080], + #[1920,1080],[1920,1080],[640,480],[1920,1080],[1920,1080],[1920,1080],[1920,1080]] +F_use_flag = [0,0,1,1,1] + +def proj_bbox(M, F_set, max_fr_dist, img_size, img_list): + max_rows = 1000000 + ext_M = np.zeros((max_rows,5)) + ext_M[0:len(M),:] = M[:,0:5] + cnt = len(M) + max_fr = int(np.max(M[:,0])) + + for n in range(len(M)): + bbox = np.zeros((1,4)) + bbox[0,:] = M[n,1:5] + fr_idx = int(M[n,0]) + + # forward + prev_bbox = bbox.copy() + for k in range(fr_idx, min(max_fr,fr_idx+max_fr_dist)): + #import pdb; pdb.set_trace() + pred_bbox = track_lib.pred_bbox_by_F(prev_bbox, F_set[:,:,k-1], 0, [], []) + check_flag = track_lib.check_bbox_near_img_bnd(pred_bbox, img_size, 10) + if check_flag==1: + break + + ext_M[cnt,0] = fr_idx+1 + ext_M[cnt,1:] = pred_bbox.copy() + prev_bbox = pred_bbox.copy() + cnt = cnt+1 + + # backward + prev_bbox = bbox.copy() + if fr_idx==1: + continue + for k in range(fr_idx,max(1,fr_idx-max_fr_dist),-1): + #img1 = misc.imread(img_list[k]) + #img2 = misc.imread(img_list[k-1]) + pred_bbox = track_lib.pred_bbox_by_F(prev_bbox, np.transpose(F_set[:,:,k-2]), 0, [], []) + check_flag = track_lib.check_bbox_near_img_bnd(pred_bbox, img_size, 10) + if check_flag==1: + break + + ext_M[cnt,0] = fr_idx+1 + ext_M[cnt,1:] = pred_bbox.copy() + prev_bbox = pred_bbox.copy() + cnt = cnt+1 + + remove_range = range(cnt,max_rows) + np.delete(ext_M, np.array(remove_range), axis=0) + return ext_M + +def estimate_GP(bbox,err_sigma): + # h = ax+by+c + N_pt = 2*len(bbox) + A = np.zeros((N_pt,3)) + b = np.zeros((N_pt,1)) + w = np.ones(N_pt)/N_pt + + for n in range(0,len(bbox)): + xmin = bbox[n,0] + xmax = bbox[n,0]+bbox[n,2] + ymax = bbox[n,1]+bbox[n,3] + h = bbox[n,3] + A[2*n,0] = xmin + A[2*n,1] = ymax + A[2*n,2] = 1 + b[2*n,0] = h + A[2*n+1,0] = xmax + A[2*n+1,1] = ymax + A[2*n+1,2] = 1 + b[2*n+1,0] = h + + iters = 20 + for k in range(iters): + W = np.diag(w) + p = np.matmul(np.linalg.pinv(np.matmul(W,A)),np.matmul(W,b)) + err_ratio = np.absolute(np.matmul(A,p)-b)/np.absolute(np.matmul(A,p)) + w = np.exp(-np.power(err_ratio[:,0],2)/np.power(err_sigma,2)) + ww = (w[::2]+w[1::2])/2 + w[::2] = ww + w[1::2] = ww + w = w/np.sum(w) + #import pdb; pdb.set_trace() + return p + +def h_err_pred(p,bbox,err_sigma): + x_center = (bbox[:,0]+bbox[:,2])/2 + ymax = bbox[:,1]+bbox[:,3] + h = bbox[:,3] + A = np.ones((len(bbox),3)) + A[:,0] = x_center + A[:,1] = ymax + h_pred = np.matmul(A,p) + #import pdb; pdb.set_trace() + err_ratio = np.absolute(h_pred[:,0]-h)/np.absolute(h_pred[:,0]) + err_ratio[h_pred[:,0]==0] = 0 + import pdb; pdb.set_trace() + ''' + for n in range(len(h_pred)): + if h_pred[n,0]==0: + import pdb; pdb.set_trace() + ''' + return err_ratio + +def extract_classifier_features(): + tr_M = [] + gt_M = [] + cnn_fea_mat = [] + cnn_label = [] + num_det = [] + #loc_score_h = [] + #loc_score_y = [] + err_ratio_h = [] + det_IOU = [] + + bnd_thresh = 10 + max_fr_dist = 10 + err_sigma = 1 + for n in range(len(train_seqs)): + print(n) + det_file_path = det_path+'/'+train_seqs[n]+'/det/det.txt' + temp_det = track_lib.load_detection(det_file_path, 'MOT_tr') + tr_M.append(temp_det) + gt_file_path = gt_path+'/'+train_seqs[n]+'/gt/gt.txt' + temp_gt = track_lib.load_detection(gt_file_path, 'MOT_gt') + gt_M.append(temp_gt) + cnn_fea_file_path = cnn_fea_path+'/'+train_seqs[n]+'.csv' + f = np.loadtxt(cnn_fea_file_path, delimiter=',') + f = np.array(f) + cnn_fea_mat.append(f) + num_det.append(f.shape[0]) + + img_list = [] + for kk in range(int(np.max(temp_gt[:,0]))): + img_path = img_folder+'/'+train_seqs[n][0:8]+'/img1/'+track_lib.file_name(kk+1,6)+'.jpg' + img_list.append(img_path) + + if F_use_flag[n]==1: + F_set_file_path = F_set_path+'/'+train_seqs[n][0:8]+'_F_set.mat' + F_set = loadmat(F_set_file_path) + F_set = F_set['F_set'] + + loc_train_idx = np.zeros((f.shape[0],1),dtype=int) + temp_label = np.zeros((f.shape[0],1)) + temp_IOU = np.zeros((f.shape[0],1)) + for k in range(temp_det.shape[0]): + temp_det_bbox = np.zeros((1,4)) + temp_det_bbox[0,:] = temp_det[k,1:5] + #import pdb; pdb.set_trace() + if abs(temp_det_bbox[0,0])>bnd_thresh and abs(temp_det_bbox[0,1])>bnd_thresh and \ + abs(temp_det_bbox[0,0]+temp_det_bbox[0,2]-img_size[n][0])>bnd_thresh and \ + abs(temp_det_bbox[0,1]+temp_det_bbox[0,3]-img_size[n][1])>bnd_thresh: + loc_train_idx[k,0] = 1 + + choose_idx1 = list(np.where(np.logical_and(temp_gt[:,0]==temp_det[k,0],temp_gt[:,6]==1))[0]) + choose_idx2 = list(np.where(np.logical_and(temp_gt[:,0]==temp_det[k,0],temp_gt[:,6]==2))[0]) + choose_idx3 = list(np.where(np.logical_and(temp_gt[:,0]==temp_det[k,0],temp_gt[:,6]==7))[0]) + choose_idx = [] + choose_idx.extend(choose_idx1) + choose_idx.extend(choose_idx2) + choose_idx.extend(choose_idx3) + choose_idx = np.array(choose_idx,dtype=int) + choose_idx = np.unique(choose_idx) + + if len(choose_idx)==0: + temp_label[k] = 0 + continue + temp_gt_bbox = temp_gt[choose_idx,1:5] + xmax = temp_gt_bbox[:,0].copy()+temp_gt_bbox[:,2].copy() + ymax = temp_gt_bbox[:,1].copy()+temp_gt_bbox[:,3].copy() + xmin = temp_gt_bbox[:,0].copy() + xmin[xmin<0] = 0 + ymin = temp_gt_bbox[:,1].copy() + ymin[ymin<0] = 0 + w = xmax-xmin + h = ymax-ymin + temp_gt_bbox[:,0] = xmin + temp_gt_bbox[:,1] = ymin + temp_gt_bbox[:,2] = w + temp_gt_bbox[:,3] = h + #import pdb; pdb.set_trace() + overlap_mat = track_lib.get_overlap(temp_det_bbox, temp_gt_bbox) + temp_IOU[k,0] = np.max(overlap_mat) + if np.max(overlap_mat)>0.5: + temp_label[k] = 1 + else: + temp_label[k] = 0 + cnn_label.append(temp_label) + det_IOU.append(temp_IOU) + + # train location + M = temp_det[loc_train_idx[:,0]==1,:] + err_ratio = np.zeros((len(temp_det),1)) + if F_use_flag[n]==1: + ext_M = proj_bbox(M, F_set, max_fr_dist, img_size[n], img_list) + for t in range(int(np.min(M[:,0])), int(np.max(M[:,0]))+1): + temp_bbox = ext_M[ext_M[:,0]==t,1:5] + GP_p = estimate_GP(temp_bbox,err_sigma) + temp_det_bbox = temp_det[temp_det[:,0]==t,1:5] + #import pdb; pdb.set_trace() + err_ratio[temp_det[:,0]==t,0] = h_err_pred(GP_p,temp_det_bbox,err_sigma) + else: + GP_p = estimate_GP(M[:,1:5],err_sigma) + #import pdb; pdb.set_trace() + err_ratio[:,0] = h_err_pred(GP_p,temp_det[:,1:5],err_sigma) + err_ratio_h.append(err_ratio) + + ''' + # train location + # h + yloc = temp_det[loc_train_idx[:,0]==1,2]+temp_det[loc_train_idx[:,0]==1,4] + hloc = temp_det[loc_train_idx[:,0]==1,4] + ph, py = track_lib.estimate_h_y(hloc, yloc) + + A = np.ones((temp_label.shape[0],2)) + A[:,0] = temp_det[:,2]+temp_det[:,4] + y_err = (np.matmul(A,ph)-temp_det[:,4])/temp_det[:,4] + err_std = np.std(y_err) + w = np.zeros((y_err.shape[0],1)) + w[:,0] = np.exp(-np.power(y_err,2)/(err_std*err_std)) + #import pdb; pdb.set_trace() + loc_score_h.append(w) + + + # y + A = np.ones((temp_label.shape[0],2)) + A[:,0] = temp_det[:,4] + y_err = np.matmul(A,py)-(temp_det[:,2]+temp_det[:,4]) + err_std = np.std(y_err) + w = np.zeros((y_err.shape[0],1)) + w[:,0] = np.exp(-np.power(y_err,2)/(err_std*err_std)) + loc_score_y.append(w) + ''' + + total_num_det = int(np.sum(np.array(num_det))) + tr_fea_mat = np.zeros((total_num_det,fea_size)) + tr_label = np.zeros((total_num_det,1)) + cnt = 0 + for n in range(len(train_seqs)): + tr_fea_mat[cnt:cnt+num_det[n],:] = cnn_fea_mat[n] + tr_label[cnt:cnt+num_det[n],0] = cnn_label[n][:,0] + cnt = cnt+num_det[n] + + # train svm for cnn features + #import pdb; pdb.set_trace() + clf = svm.LinearSVC() + clf.fit(tr_fea_mat, tr_label[:,0]) + pred_label = clf.predict(tr_fea_mat) + err = np.sum(np.absolute(pred_label-tr_label[:,0]))/tr_label.shape[0] + #print(err) + #import pdb; pdb.set_trace() + pred_s = np.zeros((pred_label.shape[0],1)) + pred_s[:,0] = clf.decision_function(tr_fea_mat) + joblib.dump(clf, save_cnn_svm_path) + + # save det to file + cnt = 0 + for n in range(len(train_seqs)): + det_file_path = det_path+'/'+train_seqs[n]+'/det/det.txt' + f = np.loadtxt(det_file_path, delimiter=',') + f = np.array(f) + #import pdb; pdb.set_trace() + f = np.concatenate([f[:,0:10],pred_s[cnt:cnt+f.shape[0],:]],axis=1) + #f = np.concatenate([f,loc_score_h[n]],axis=1) + #f = np.concatenate([f,loc_score_y[n]],axis=1) + f = np.concatenate([f,err_ratio_h[n]],axis=1) + f = np.concatenate([f,det_IOU[n]],axis=1) + #import pdb; pdb.set_trace() + np.savetxt(det_file_path, f, delimiter=',') + cnt = cnt+f.shape[0] + return + +def train_classifier(): + track_struct = [] + num_tracklet = [] + for n in range(len(train_seqs)): + track_struct_file_path = track_struct_path+'/'+train_seqs[n]+'.obj' + temp_track_struct = pickle.load(open(track_struct_file_path,'rb')) + num_tracklet.append(temp_track_struct['tracklet_mat']['xmin_mat'].shape[0]) + track_struct.append(temp_track_struct) + total_num_tracklet = np.sum(np.array(num_tracklet)) + tracklet_fea = np.zeros((total_num_tracklet,17)) + tracklet_label = np.zeros((total_num_tracklet,1)) + cnt = 0 + + # t_duration, det_score, svm_score, h_score, y_score + for n in range(len(train_seqs)): + print(n) + for k in range(num_tracklet[n]): + #if n==2: + # import pdb; pdb.set_trace() + idx = np.where(track_struct[n]['tracklet_mat']['xmin_mat'][k,:]!=-1)[0] + tracklet_fea[cnt,:] = track_lib.extract_tracklet_feature(track_struct[n]['tracklet_mat'], k, idx) + mean_IOU = np.mean(track_struct[n]['tracklet_mat']['IOU_gt_mat'][k,idx]) + if mean_IOU>0.5: + tracklet_label[cnt,0] = 1 + cnt = cnt+1 + + # train random forest + iters = 1 + train_num = int(total_num_tracklet) + avg_err = np.zeros(iters) + for n in range(iters): + clf = RandomForestClassifier(n_estimators=200, max_depth=30, random_state=0) + #clf = svm.SVC() + + shuffle_idx = np.random.permutation(total_num_tracklet) + train_fea = tracklet_fea[shuffle_idx[0:train_num],:] + train_label = tracklet_label[shuffle_idx[0:train_num],0] + clf.fit(train_fea, train_label) + + test_fea = tracklet_fea[shuffle_idx,:] + test_label = tracklet_label[shuffle_idx,0] + pred_label = clf.predict(test_fea) + avg_err[n] = np.sum(np.absolute(pred_label-test_label))/test_label.shape[0] + print(np.mean(avg_err)) + + + joblib.dump(clf, save_classifier_path) + + ''' + # train svm + clf = svm.SVC() + clf.fit(tracklet_fea, tracklet_label[:,0]) + pred_label = clf.predict(tracklet_fea) + err = np.sum(np.absolute(pred_label-tracklet_label[:,0]))/tracklet_label.shape[0] + print(err) + ''' diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/tracklet_utils_2d_online.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/tracklet_utils_2d_online.py new file mode 100644 index 0000000000000000000000000000000000000000..e186c2b0b38dc94ea807ff177c8bda0763378b4d --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/tracklet_utils_2d_online.py @@ -0,0 +1,2087 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +/* + * Copyright ©2019 Gaoang Wang. All rights reserved. Permission is + * hereby granted for academic use. No other use, copying, distribution, or modification + * is permitted without prior written consent. Copyrights for + * third-party components of this work must be honored. Instructors + * interested in reusing these course materials should contact the + * author. + */ + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +import numpy as np +import argparse +import facenet +import lfw +import os +import sys +import cv2 +import pickle +import time +from functools import wraps + +from tensorflow.python.ops import data_flow_ops +from sklearn import metrics +from scipy.optimize import brentq +from scipy import interpolate +from scipy.interpolate import interp1d +from scipy.io import loadmat +from scipy import misc +from scipy import stats +import matplotlib.pyplot as plt +import matplotlib.patches as patches +from PIL import Image +import shutil +import seq_nn_3d_v2 +import track_lib + +global remove_set +global track_set +remove_set = [] +track_set = [] + +def convert_frames_to_video(pathIn,pathOut,fps): + frame_array = [] + files = [f for f in os.listdir(pathIn) if os.path.isfile(os.path.join(pathIn, f))] + + #for sorting the file names properly + files.sort(key = lambda x: int(x[5:-4])) + + for i in range(len(files)): + filename=pathIn + files[i] + #reading each files + img = cv2.imread(filename) + height, width, layers = img.shape + + if i==0: + size = (width,height) + img = cv2.resize(img,size) + #print(filename) + #inserting the frames into an image array + frame_array.append(img) + + out = cv2.VideoWriter(pathOut,cv2.VideoWriter_fourcc(*'DIVX'), fps, size) + + for i in range(len(frame_array)): + # writing to a image array + out.write(frame_array[i]) + out.release() + + +def wrt_missing_det(save_mat): + # fr_id, track_id, xmin, ymin, xmax, ymax, x, y, w, h, det_score + global track_struct + for n in range(len(save_mat)): + fr_id = int(save_mat[n,0]) + obj_id = int(save_mat[n,1]) + if track_struct['tracklet_mat']['obj_end_fr_info'][obj_id,0]==-1: + continue + + num_miss_fr = int(fr_id-track_struct['tracklet_mat']['obj_end_fr_info'][obj_id,0]-1) + if num_miss_fr<=0: + continue + + temp_save_mat = np.zeros((num_miss_fr,12)) + fr_range = np.array(range(int(track_struct['tracklet_mat']['obj_end_fr_info'][obj_id,0])+1,fr_id)) + interp_xmin = np.interp(fr_range, [fr_range[0]-1, fr_id], + [track_struct['tracklet_mat']['obj_end_fr_info'][obj_id,2],save_mat[n,3]]) + interp_ymin = np.interp(fr_range, [fr_range[0]-1, fr_id], + [track_struct['tracklet_mat']['obj_end_fr_info'][obj_id,3],save_mat[n,4]]) + interp_xmax = np.interp(fr_range, [fr_range[0]-1, fr_id], + [track_struct['tracklet_mat']['obj_end_fr_info'][obj_id,4],save_mat[n,5]]) + interp_ymax = np.interp(fr_range, [fr_range[0]-1, fr_id], + [track_struct['tracklet_mat']['obj_end_fr_info'][obj_id,5],save_mat[n,6]]) + interp_x_3d = np.interp(fr_range, [fr_range[0]-1, fr_id], + [track_struct['tracklet_mat']['obj_end_fr_info'][obj_id,6],save_mat[n,7]]) + interp_y_3d = np.interp(fr_range, [fr_range[0]-1, fr_id], + [track_struct['tracklet_mat']['obj_end_fr_info'][obj_id,7],save_mat[n,8]]) + interp_w_3d = np.interp(fr_range, [fr_range[0]-1, fr_id], + [track_struct['tracklet_mat']['obj_end_fr_info'][obj_id,8],save_mat[n,9]]) + interp_h_3d = np.interp(fr_range, [fr_range[0]-1, fr_id], + [track_struct['tracklet_mat']['obj_end_fr_info'][obj_id,9],save_mat[n,10]]) + #interp_class_name = np.interp(fr_range, [fr_range[0]-1, fr_id], + # [track_struct['tracklet_mat']['obj_end_fr_info'][obj_id,11],save_mat[n,12]]) + #interp_dist2cam = np.interp(fr_range, [fr_range[0]-1, fr_id], + # [track_struct['tracklet_mat']['obj_end_fr_info'][obj_id,12],save_mat[n,13]]) + temp_save_mat[:,0] = fr_range + temp_save_mat[:,1] = obj_id + temp_save_mat[:,2] = -1 + temp_save_mat[:,3] = interp_xmin + temp_save_mat[:,4] = interp_ymin + temp_save_mat[:,5] = interp_xmax + temp_save_mat[:,6] = interp_ymax + temp_save_mat[:,7] = interp_x_3d + temp_save_mat[:,8] = interp_y_3d + temp_save_mat[:,9] = interp_w_3d + temp_save_mat[:,10] = interp_h_3d + temp_save_mat[:,11] = -1 + #temp_save_mat[:,12] = interp_class_name + #temp_save_mat[:,13] = interp_dist2cam + + f = open(track_struct['file_path']['txt_result_path'], 'a') + np.savetxt(f, temp_save_mat, delimiter=',') + f.close() + + # update track_struct['tracklet_mat']['obj_end_fr_info'] + track_struct['tracklet_mat']['obj_end_fr_info'][save_mat[:,1].astype(int),0] = save_mat[:,0] + track_struct['tracklet_mat']['obj_end_fr_info'][save_mat[:,1].astype(int),1:] = save_mat[:,2:] + return + +def draw_result(img, save_mat, fr_id): + + global track_struct + save_folder = track_struct['file_path']['tracking_img_folder'] + table = track_struct['tracklet_mat']['color_table'] + save_path = save_folder+'/'+track_lib.file_name(fr_id,10)+'.jpg' + + # Create figure and axes + fig,ax = plt.subplots(1) + + # Display the image + ax.imshow(img) + + # Create Rectangle patches + # save_mat = [fr_id, obj_id, track_id, x, y, w, h, x_3d, y_3d, w_3d, h_3d, det_score] + for k in range(len(save_mat)): + obj_id = int(save_mat[k,1]) + tracklet_id = int(save_mat[k,2]) + xmin = int(save_mat[k,3]) + ymin = int(save_mat[k,4]) + w = int(save_mat[k,5]) + h = int(save_mat[k,6]) + rect = patches.Rectangle((xmin,ymin),w,h,linewidth=1,edgecolor='#'+table[obj_id], facecolor='none') + img_text = plt.text(xmin,ymin,str(obj_id)+'_'+str(tracklet_id),fontsize=6,color='#'+table[obj_id]) + + # Add the patch to the Axes + ax.add_patch(rect) + + if not os.path.isdir(save_folder): + os.makedirs(save_folder) + + plt.savefig(save_path,bbox_inches='tight',dpi=400) + plt.clf() + plt.close('all') + return + +def post_processing(debug_mode): + + global track_struct + #import pdb; pdb.set_trace() + + # update comb_cost + cand_track_idx = np.where(track_struct['tracklet_mat']['track_id_mat']!=-1)[0] + for n in range(len(cand_track_idx)): + track_struct['tracklet_mat']['comb_track_cost'][cand_track_idx[n],cand_track_idx] \ + = track_struct['sub_tracklet_mat']['comb_track_cost'][n,:].copy() + track_struct['tracklet_mat']['comb_track_cost_mask'][cand_track_idx[n],cand_track_idx] \ + = track_struct['sub_tracklet_mat']['comb_track_cost_mask'][n,:].copy() + + # + tracklet_mat = track_struct['sub_tracklet_mat'] + track_params = track_struct['track_params'] + new_tracklet_mat = tracklet_mat.copy() + #import pdb; pdb.set_trace() + + # update track cluster + N_cluster = len(tracklet_mat["track_cluster"]) + new_assigned_id_mask = track_struct['tracklet_mat']['save_obj_id_mask'].copy() + avai_ids = np.where(track_struct['tracklet_mat']['assigned_obj_id_mask']==0)[0] + + check_save_idx = list(np.where(new_assigned_id_mask==1)[0]) + check_assigned_idx = list(np.where(avai_ids==1)[0]) + + new_cnt = -1 + for n in range(N_cluster): + if len(tracklet_mat["track_cluster"][n])==0: + continue + + #if debug_mode==1: + # import pdb; pdb.set_trace() + + finish_check_idx = 0 + + # check save_obj_id_mask + obj_ids = tracklet_mat['obj_id_mat'][np.array(tracklet_mat["track_cluster"][n],dtype=int)] + obj_mask = track_struct['tracklet_mat']['save_obj_id_mask'][obj_ids] + save_idx = np.where(obj_mask==1)[0] + + for k in range(len(tracklet_mat["track_cluster"][n])): + temp_id = tracklet_mat['obj_id_mat'][tracklet_mat["track_cluster"][n][k]] + if temp_id in check_save_idx: + track_struct['sub_tracklet_mat']['obj_id_mat'][np.array(tracklet_mat["track_cluster"][n],dtype=int)] = temp_id + finish_check_idx = 1 + check_save_idx.remove(temp_id) + if temp_id in check_assigned_idx: + check_assigned_idx.remove(temp_id) + break + + if finish_check_idx==1: + continue + + # check assigned_obj_id_mask + for k in range(len(tracklet_mat["track_cluster"][n])): + temp_id = tracklet_mat['obj_id_mat'][tracklet_mat["track_cluster"][n][k]] + if temp_id in check_assigned_idx: + track_struct['sub_tracklet_mat']['obj_id_mat'][np.array(tracklet_mat["track_cluster"][n],dtype=int)] = temp_id + finish_check_idx = 1 + check_assigned_idx.remove(temp_id) + break + + if finish_check_idx==1: + continue + + new_cnt = new_cnt+1 + track_struct['sub_tracklet_mat']['obj_id_mat'][np.array(tracklet_mat["track_cluster"][n],dtype=int)] = avai_ids[new_cnt] + + ''' + # check save_obj_id_mask + obj_ids = tracklet_mat['obj_id_mat'][np.array(tracklet_mat["track_cluster"][n],dtype=int)] + obj_mask = track_struct['tracklet_mat']['save_obj_id_mask'][obj_ids] + save_idx = np.where(obj_mask==1)[0] + if len(save_idx)>0: + track_struct['sub_tracklet_mat']['obj_id_mat'][np.array(tracklet_mat["track_cluster"][n],dtype=int)] = obj_ids[save_idx[0]] + continue + + # check assigned_obj_id_mask + obj_mask = track_struct['tracklet_mat']['assigned_obj_id_mask'][obj_ids] + assigned_idx = np.where(obj_mask==1)[0] + if len(assigned_idx)==0: + new_cnt = new_cnt+1 + track_struct['sub_tracklet_mat']['obj_id_mat'][np.array(tracklet_mat["track_cluster"][n],dtype=int)] = avai_ids[new_cnt] + else: + check_flag = 0 + for k in range(len(assigned_idx)): + temp_obj_id = obj_ids[assigned_idx[k]] + if new_assigned_id_mask[temp_obj_id]==1: + continue + else: + track_struct['sub_tracklet_mat']['obj_id_mat'][np.array(tracklet_mat["track_cluster"][n],dtype=int)] \ + = temp_obj_id + check_flag = 1 + new_assigned_id_mask[temp_obj_id] = 1 + break + if check_flag==0: + new_cnt = new_cnt+1 + track_struct['sub_tracklet_mat']['obj_id_mat'][np.array(tracklet_mat["track_cluster"][n],dtype=int)] = avai_ids[new_cnt] + ''' + # copy to tracklet_mat + #import pdb; pdb.set_trace() + cand_track_idx = np.where(track_struct['tracklet_mat']['track_id_mat']!=-1)[0] + track_struct['tracklet_mat']['obj_id_mat'][cand_track_idx] = track_struct['sub_tracklet_mat']['obj_id_mat'].copy() + + return + +def comb_cost(tracklet_set, sess): + + global track_struct + #global all_fea_mat + #global all_fea_label + + img_size = track_struct['track_params']['img_size'] + feature_size = track_struct['track_params']['feature_size'] + max_length = track_struct['track_params']['max_length'] + + tracklet_mat = track_struct['sub_tracklet_mat'] + loc_scales = track_struct['track_params']['loc_scales'] + + ''' + temp_sum = np.sum(all_fea_mat[:,4,:,1], axis=1) + if len(np.where(temp_sum!=0)[0])==0: + fea_id = 0 + else: + fea_id = int(np.max(np.where(temp_sum!=0)[0]))+1 + ''' + + # cnn classifier + N_tracklet = len(tracklet_set) + track_interval = tracklet_mat['track_interval'] + sort_idx = np.argsort(track_interval[np.array(tracklet_set),1]) + cost = 0 + if len(sort_idx)<=1: + return cost + + remove_ids = [] + + comb_fea_mat = np.zeros((int(len(sort_idx)*(len(sort_idx)-1)/2),feature_size,max_length,3)) + comb_fea_label = np.zeros((int(len(sort_idx)*(len(sort_idx)-1)/2),4)) + + temp_cost_list = [] + #print(len(comb_track_cost)) + cnt = -1 + for n in range(0, len(sort_idx)-1): + for kk in range(n+1,len(sort_idx)): + cnt = int(cnt+1) + track_id1 = tracklet_set[sort_idx[n]] + track_id2 = tracklet_set[sort_idx[kk]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + remove_ids.append(cnt) + continue + + #import pdb; pdb.set_trace() + if tracklet_mat['comb_track_cost_mask'][track_id1,track_id2]==1: + cost = cost+tracklet_mat['comb_track_cost'][track_id1,track_id2] + remove_ids.append(cnt) + continue + + comb_fea_label[cnt,0] = track_id1 + comb_fea_label[cnt,1] = track_id2 + + temp_cost_list.append([track_id1,track_id2]) + + + # t starts from 0 + #import pdb; pdb.set_trace() + t1_min = int(track_interval[track_id1,0]) + t1_max = int(track_interval[track_id1,1]) + t2_min = int(track_interval[track_id2,0]) + t2_max = int(track_interval[track_id2,1]) + t_min = int(min(t1_min,t2_min)) + t_max = int(max(t1_max,t2_max)) + + if t_max-t_min+1<=max_length: + comb_fea_mat[cnt,:,t1_min-t_min:t1_max-t_min+1,1] = 1 + comb_fea_mat[cnt,0,t1_min-t_min:t1_max-t_min+1,0] = tracklet_mat['x_3d_mat'][track_id1,t1_min:t1_max+1]/loc_scales[0] + + comb_fea_mat[cnt,1,t1_min-t_min:t1_max-t_min+1,0] = tracklet_mat['y_3d_mat'][track_id1,t1_min:t1_max+1]/loc_scales[1] + + comb_fea_mat[cnt,2,t1_min-t_min:t1_max-t_min+1,0] = tracklet_mat['w_3d_mat'][track_id1,t1_min:t1_max+1]/loc_scales[2] + + comb_fea_mat[cnt,3,t1_min-t_min:t1_max-t_min+1,0] = tracklet_mat['h_3d_mat'][track_id1,t1_min:t1_max+1]/loc_scales[3] + + cand_idx = np.where(tracklet_mat['appearance_fea_mat'][:,0]==track_id1)[0] + if len(cand_idx)>0: + temp_frs = tracklet_mat['appearance_fea_mat'][cand_idx,1] + temp_sort_idx = np.argsort(temp_frs) + cand_idx = cand_idx[temp_sort_idx] + + if comb_fea_mat[cnt,4:,t1_min-t_min:t1_max-t_min+1,0].shape[1]!=np.transpose(tracklet_mat['appearance_fea_mat'] \ + [cand_idx,2:]).shape[1]: + import pdb; pdb.set_trace() + comb_fea_mat[cnt,4:,t1_min-t_min:t1_max-t_min+1,0] = np.transpose(tracklet_mat['appearance_fea_mat'][cand_idx,2:]) + + comb_fea_mat[cnt,:,t2_min-t_min:t2_max-t_min+1,2] = 1 + + comb_fea_mat[cnt,0,t2_min-t_min:t2_max-t_min+1,0] = tracklet_mat['x_3d_mat'][track_id2,t2_min:t2_max+1]/loc_scales[0] + + comb_fea_mat[cnt,1,t2_min-t_min:t2_max-t_min+1,0] = tracklet_mat['y_3d_mat'][track_id2,t2_min:t2_max+1]/loc_scales[1] + + comb_fea_mat[cnt,2,t2_min-t_min:t2_max-t_min+1,0] = tracklet_mat['w_3d_mat'][track_id2,t2_min:t2_max+1]/loc_scales[2] + + comb_fea_mat[cnt,3,t2_min-t_min:t2_max-t_min+1,0] = tracklet_mat['h_3d_mat'][track_id2,t2_min:t2_max+1]/loc_scales[3] + + cand_idx = np.where(tracklet_mat['appearance_fea_mat'][:,0]==track_id2)[0] + if len(cand_idx)>0: + temp_frs = tracklet_mat['appearance_fea_mat'][cand_idx,1] + temp_sort_idx = np.argsort(temp_frs) + cand_idx = cand_idx[temp_sort_idx] + + if comb_fea_mat[cnt,4:,t2_min-t_min:t2_max-t_min+1,0].shape[1]!=np.transpose(tracklet_mat['appearance_fea_mat'] \ + [cand_idx,2:]).shape[1]: + import pdb; pdb.set_trace() + + comb_fea_mat[cnt,4:,t2_min-t_min:t2_max-t_min+1,0] = np.transpose(tracklet_mat['appearance_fea_mat'][cand_idx,2:]) + else: + t_len1 = t1_max-t1_min+1 + t_len2 = t2_max-t2_min+1 + t_len_min = min(t_len1,t_len2) + mid_t = int(0.5*(t1_max+t2_min)) + if mid_t-t1_min+1>=0.5*max_length and t2_max-mid_t+1<=0.5*max_length: + t2_end = t2_max + t1_start = t2_end-max_length+1 + #t1_start = mid_t-int(0.5*max_length)+1 + #t2_end = t1_start+max_length-1 + elif mid_t-t1_min+1<=0.5*max_length and t2_max-mid_t+1>=0.5*max_length: + t1_start = t1_min + t2_end = t1_start+max_length-1 + else: # mid_t-t1_min+1>=0.5*max_length and t2_max-mid_t+1>=0.5*max_length: + t1_start = mid_t-int(0.5*max_length)+1 + t2_end = t1_start+max_length-1 + + comb_fea_mat[cnt,:,0:t1_max-t1_start+1,1] = 1 + if comb_fea_mat[cnt,0,0:t1_max-t1_start+1,0].shape[0] \ + !=tracklet_mat['xmax_mat'][track_id1,t1_start:t1_max+1].shape[0]: + import pdb; pdb.set_trace() + comb_fea_mat[cnt,0,0:t1_max-t1_start+1,0] = tracklet_mat['x_3d_mat'][track_id1,t1_start:t1_max+1]/loc_scales[0] + + comb_fea_mat[cnt,1,0:t1_max-t1_start+1,0] = tracklet_mat['y_3d_mat'][track_id1,t1_start:t1_max+1]/loc_scales[1] + + comb_fea_mat[cnt,2,0:t1_max-t1_start+1,0] = tracklet_mat['w_3d_mat'][track_id1,t1_start:t1_max+1]/loc_scales[2] + + comb_fea_mat[cnt,3,0:t1_max-t1_start+1,0] = tracklet_mat['h_3d_mat'][track_id1,t1_start:t1_max+1]/loc_scales[3] + + cand_idx = np.where(tracklet_mat['appearance_fea_mat'][:,0]==track_id1)[0] + if len(cand_idx)>0: + temp_frs = tracklet_mat['appearance_fea_mat'][cand_idx,1] + temp_sort_idx = np.argsort(temp_frs) + cand_idx = cand_idx[temp_sort_idx] + + cand_idx = cand_idx[t1_start-t1_min:] + comb_fea_mat[cnt,4:,0:t1_max-t1_start+1,0] = np.transpose(tracklet_mat['appearance_fea_mat'][cand_idx,2:]) + + comb_fea_mat[cnt,:,t2_min-t1_start:t2_end-t1_start+1,2] = 1 + if comb_fea_mat[cnt,0,t2_min-t1_start:t2_end-t1_start+1,0].shape[0] \ + !=tracklet_mat['xmin_mat'][track_id2,t2_min:t2_end+1].shape[0]: + import pdb; pdb.set_trace() + comb_fea_mat[cnt,0,t2_min-t1_start:t2_end-t1_start+1,0] = \ + tracklet_mat['x_3d_mat'][track_id2,t2_min:t2_end+1]/loc_scales[0] + comb_fea_mat[cnt,1,t2_min-t1_start:t2_end-t1_start+1,0] = \ + tracklet_mat['y_3d_mat'][track_id2,t2_min:t2_end+1]/loc_scales[1] + comb_fea_mat[cnt,2,t2_min-t1_start:t2_end-t1_start+1,0] = \ + tracklet_mat['w_3d_mat'][track_id2,t2_min:t2_end+1]/loc_scales[2] + comb_fea_mat[cnt,3,t2_min-t1_start:t2_end-t1_start+1,0] = \ + tracklet_mat['h_3d_mat'][track_id2,t2_min:t2_end+1]/loc_scales[3] + + cand_idx = np.where(tracklet_mat['appearance_fea_mat'][:,0]==track_id2)[0] + if len(cand_idx)>0: + temp_frs = tracklet_mat['appearance_fea_mat'][cand_idx,1] + temp_sort_idx = np.argsort(temp_frs) + cand_idx = cand_idx[temp_sort_idx] + #import pdb; pdb.set_trace() + cand_idx = cand_idx[0:t2_end-t2_min+1] + comb_fea_mat[cnt,4:,t2_min-t1_start:t2_end-t1_start+1,0] \ + = np.transpose(tracklet_mat['appearance_fea_mat'][cand_idx,2:]) + + # remove overlap detections + t_overlap = np.where(comb_fea_mat[cnt,0,:,1]+comb_fea_mat[cnt,0,:,2]>1) + if len(t_overlap)>0: + t_overlap = t_overlap[0] + comb_fea_mat[cnt,:,t_overlap,:] = 0 + + + if len(track_set)>0: + search_idx = np.where(np.logical_and(track_set[:,0]==track_id1, track_set[:,1]==track_id2)) + if len(search_idx[0])>0: + #save_fea_mat[search_idx[0][0],:,:,:] = comb_fea_mat[n,:,:,:] + if track_set[search_idx[0][0],2]==1: + comb_fea_label[cnt,2] = 1 + else: + comb_fea_label[cnt,3] = 1 + + + if len(remove_ids)>0: + comb_fea_mat = np.delete(comb_fea_mat, np.array(remove_ids), axis=0) + comb_fea_label = np.delete(comb_fea_label, np.array(remove_ids), axis=0) + + if len(comb_fea_mat)>0: + + comb_fea_mat = track_lib.interp_batch(comb_fea_mat) + + max_batch_size = 16 + num_batch = int(np.ceil(comb_fea_mat.shape[0]/max_batch_size)) + pred_y = np.zeros((comb_fea_mat.shape[0],2)) + for n in range(num_batch): + if n!=num_batch-1: + batch_size = max_batch_size + else: + batch_size = int(comb_fea_mat.shape[0]-(num_batch-1)*max_batch_size) + + # batch_size = comb_fea_mat.shape[0] + x = np.zeros((batch_size,1,max_length,1)) + y = np.zeros((batch_size,1,max_length,1)) + w = np.zeros((batch_size,1,max_length,1)) + h = np.zeros((batch_size,1,max_length,1)) + ap = np.zeros((batch_size,feature_size-4,max_length,1)) + mask_1 = np.zeros((batch_size,1,max_length,2)) + mask_2 = np.zeros((batch_size,feature_size-4,max_length,2)) + x[:,0,:,0] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,0,:,0] + y[:,0,:,0] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,1,:,0] + w[:,0,:,0] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,2,:,0] + h[:,0,:,0] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,3,:,0] + ap[:,:,:,0] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,4:,:,0] + mask_1[:,0,:,:] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,0,:,1:] + mask_2[:,:,:,:] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,4:,:,1:] + pred_y[n*max_batch_size:n*max_batch_size+batch_size,:] = sess.run(y_conv, feed_dict={batch_X_x: x, + batch_X_y: y, + batch_X_w: w, + batch_X_h: h, + batch_X_a: ap, + batch_mask_1: mask_1, + batch_mask_2: mask_2, + batch_Y: np.zeros((batch_size,2)), + keep_prob: 1.0}) + + for n in range(len(pred_y)): + if np.sum(comb_fea_label[n,2:4])>0: + continue + if pred_y[n,0]>pred_y[n,1]: + comb_fea_label[n,2] = 1 + else: + comb_fea_label[n,3] = 1 + + if comb_fea_mat.shape[0]!=len(pred_y): + import pdb; pdb.set_trace() + + ''' + all_fea_mat[fea_id:fea_id+len(pred_y),:,:,:] = comb_fea_mat + all_fea_label[fea_id:fea_id+len(pred_y),:] = comb_fea_label + ''' + + + cost = cost+np.sum(pred_y[:,1]-pred_y[:,0]) + #import pdb; pdb.set_trace() + + if pred_y.shape[0]!=len(temp_cost_list): + import pdb; pdb.set_trace() + for n in range(pred_y.shape[0]): + + tracklet_mat['comb_track_cost_mask'][temp_cost_list[n][0],temp_cost_list[n][1]] = 1 + tracklet_mat['comb_track_cost'][temp_cost_list[n][0],temp_cost_list[n][1]] = pred_y[n,1]-pred_y[n,0] + + return cost + +def get_split_cost(track_id, sess): + + global track_struct + tracklet_mat = track_struct['sub_tracklet_mat'] + new_cluster_cost = np.zeros((2,1)) + if len(tracklet_mat['track_cluster'][track_id])<2: + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + track_interval = tracklet_mat['track_interval'].copy() + change_cluster_idx = [len(tracklet_mat['track_cluster']), tracklet_mat['track_class'][track_id]] + new_cluster_set = [] + new_cluster_set.append([track_id]) + remain_tracks = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + remain_tracks.remove(track_id) + new_cluster_set.append(remain_tracks) + + # get cost + if len(remain_tracks)>1: + sort_idx = np.argsort(track_interval[np.array(new_cluster_set[1]),1]) + for n in range(0, len(sort_idx)-1): + track_id1 = new_cluster_set[1][sort_idx[n]] + track_id2 = new_cluster_set[1][sort_idx[n+1]] + #if track_id1==42: + # import pdb; pdb.set_trace() + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + #********************************* + new_cluster_cost[1,0] = comb_cost(remain_tracks, sess) + + # cross cost + comb_cluster = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + sort_idx = np.argsort(track_interval[np.array(comb_cluster),1]) + cross_cost = np.zeros((2,1)) + + cost = np.sum(new_cluster_cost)-cross_cost[1,0] + prev_cost = tracklet_mat['cluster_cost'][tracklet_mat['track_class'][track_id]]-cross_cost[0,0] + diff_cost = cost-prev_cost + + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + +def get_assign_cost(track_id, sess): + + global track_struct + tracklet_mat = track_struct['sub_tracklet_mat'] + + #import pdb; pdb.set_trace() + cluster1 = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + new_cluster_cost = np.zeros((2,1)) + new_cluster_set = [] + new_cluster_set.append(cluster1.copy()) + new_cluster_set[0].remove(track_id) + track_interval = tracklet_mat['track_interval'].copy() + # get cost + if len(new_cluster_set[0])>1: + + sort_idx = np.argsort(track_interval[np.array(new_cluster_set[0]),1]) + for n in range(0, len(sort_idx)-1): + track_id1 = new_cluster_set[0][sort_idx[n]] + track_id2 = new_cluster_set[0][sort_idx[n+1]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + new_cluster_cost[0,0] = comb_cost(new_cluster_set[0], sess) + + track_class = track_struct['sub_tracklet_mat']['track_class'][track_id] + t_cluster_idx = track_struct['sub_tracklet_mat']['track_cluster_t_idx'][track_class] + + NN_cluster = len(tracklet_mat['track_cluster']) + temp_new_cluster_cost = float("inf")*np.ones((NN_cluster,1)) + prev_cost_vec = np.zeros((NN_cluster,1)) + cross_cost_vec = np.zeros((NN_cluster,2)) + + for nn in range(len(t_cluster_idx)): + N_cluster = len(track_struct['sub_tracklet_mat']['time_cluster'][t_cluster_idx[nn]]) + for mm in range(N_cluster): + n = track_struct['sub_tracklet_mat']['time_cluster'][t_cluster_idx[nn]][mm] + # the original cluster + if tracklet_mat['track_class'][track_id]==n: + continue + + # check neighbor and conflict track + cluster2 = tracklet_mat['track_cluster'][n] + neighbor_flag = 1 + conflict_flag = 0 + #remove_flag = 0 + temp_cluster_set = cluster2.copy() + temp_cluster_set.append(track_id) + sort_idx = np.argsort(track_interval[np.array(temp_cluster_set),1]) + for m in range(0, len(sort_idx)-1): + track_id1 = temp_cluster_set[sort_idx[m]] + track_id2 = temp_cluster_set[sort_idx[m+1]] + #if cluster2[m] in remove_set: + # remove_flag = 1 + # break + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + neighbor_flag = 0 + break + if track_id1 in tracklet_mat['conflict_track_idx'][track_id2]: + conflict_flag = 1 + break + if neighbor_flag==0 or conflict_flag==1:# or remove_flag==1: + continue + + # get cost + temp_set = cluster2.copy() + temp_set.append(track_id) + temp_new_cluster_cost[n,0] = comb_cost(temp_set, sess) + + prev_cost_vec[n,0] = tracklet_mat['cluster_cost'][tracklet_mat['track_class'][track_id]] \ + +tracklet_mat['cluster_cost'][n] + + + cost_vec = temp_new_cluster_cost[:,0]+new_cluster_cost[0,0]-cross_cost_vec[:,1] + prev_cost_vec = prev_cost_vec[:,0]-cross_cost_vec[:,0] + + diff_cost_vec = cost_vec-prev_cost_vec + min_idx = np.argmin(diff_cost_vec) + cost = cost_vec[min_idx] + if cost==float("inf"): + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + diff_cost = diff_cost_vec[min_idx] + new_cluster_cost[1,0] = temp_new_cluster_cost[min_idx,0] + change_cluster_idx = [tracklet_mat['track_class'][track_id],min_idx] + temp_set = tracklet_mat['track_cluster'][min_idx].copy() + temp_set.append(track_id) + new_cluster_set.append(temp_set) + + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + +def get_merge_cost(track_id, sess): + + global track_struct + tracklet_mat = track_struct['sub_tracklet_mat'] + track_interval = tracklet_mat['track_interval'].copy() + cluster1 = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + if len(cluster1)==1: + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + track_class = track_struct['sub_tracklet_mat']['track_class'][track_id] + t_cluster_idx = track_struct['sub_tracklet_mat']['track_cluster_t_idx'][track_class] + + NN_cluster = len(tracklet_mat['track_cluster']) + new_cluster_cost_vec = float("inf")*np.ones((NN_cluster,1)) + prev_cost_vec = np.zeros((NN_cluster,1)) + cross_cost_vec = np.zeros((NN_cluster,2)) + + for nn in range(len(t_cluster_idx)): + N_cluster = len(track_struct['sub_tracklet_mat']['time_cluster'][t_cluster_idx[nn]]) + + for mm in range(N_cluster): + n = track_struct['sub_tracklet_mat']['time_cluster'][t_cluster_idx[nn]][mm] + + # the original cluster + if tracklet_mat['track_class'][track_id]==n: + continue + + # check neighbor and conflict track + cluster2 = tracklet_mat['track_cluster'][n].copy() + if len(cluster2)<=1: + continue + + neighbor_flag = 1 + conflict_flag = 0 + #remove_flag = 0 + temp_cluster_set = cluster1+cluster2 + sort_idx = np.argsort(track_interval[np.array(temp_cluster_set),1]) + for m in range(0, len(sort_idx)-1): + track_id1 = temp_cluster_set[sort_idx[m]] + track_id2 = temp_cluster_set[sort_idx[m+1]] + + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + neighbor_flag = 0 + break + if track_id1 in tracklet_mat['conflict_track_idx'][track_id2]: + conflict_flag = 1 + break + + if neighbor_flag==0 or conflict_flag==1:# or remove_flag==1: + continue + + + # get cost + new_cluster_cost_vec[n,0] = comb_cost(cluster1+cluster2, sess) + + prev_cost_vec[n,0] = tracklet_mat['cluster_cost'][tracklet_mat['track_class'][track_id]] \ + +tracklet_mat['cluster_cost'][n] + + prev_cost_vec = prev_cost_vec[:,0]-cross_cost_vec[:,0] + diff_cost_vec = new_cluster_cost_vec[:,0]-prev_cost_vec + min_idx = np.argmin(diff_cost_vec) + cost = new_cluster_cost_vec[min_idx,0] + if cost==float("inf"): + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + diff_cost = diff_cost_vec[min_idx] + new_cluster_cost = np.zeros((2,1)) + new_cluster_cost[0,0] = cost + change_cluster_idx = [tracklet_mat['track_class'][track_id], min_idx] + new_cluster_set = [] + temp_set = cluster1.copy() + temp_set = temp_set+tracklet_mat['track_cluster'][min_idx] + new_cluster_set.append(temp_set) + new_cluster_set.append([]) + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + +def get_switch_cost(track_id, sess): + + global track_struct + tracklet_mat = track_struct['sub_tracklet_mat'] + track_interval = tracklet_mat['track_interval'].copy() + cluster1 = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + S1 = [] + S2 = [] + for k in range(len(cluster1)): + temp_id = cluster1[k] + if tracklet_mat['track_interval'][temp_id,1]<=tracklet_mat['track_interval'][track_id,1]: + S1.append(temp_id) + else: + S2.append(temp_id) + if len(S1)==0 or len(S2)==0: + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + track_class = track_struct['sub_tracklet_mat']['track_class'][track_id] + t_cluster_idx = track_struct['sub_tracklet_mat']['track_cluster_t_idx'][track_class] + + NN_cluster = len(tracklet_mat['track_cluster']) + cost_vec = float("inf")*np.ones((NN_cluster,1)) + prev_cost_vec = np.zeros((NN_cluster,1)) + new_cluster_cost_vec1 = float("inf")*np.ones((NN_cluster,1)) + new_cluster_cost_vec2 = float("inf")*np.ones((NN_cluster,1)) + cross_cost_vec = np.zeros((NN_cluster,2)) + + track_id_set = [] + for n in range(NN_cluster): + track_id_set.append([]) + + for nn in range(len(t_cluster_idx)): + N_cluster = len(track_struct['sub_tracklet_mat']['time_cluster'][t_cluster_idx[nn]]) + + for mm in range(N_cluster): + n = track_struct['sub_tracklet_mat']['time_cluster'][t_cluster_idx[nn]][mm] + + # the original cluster + if tracklet_mat['track_class'][track_id]==n: + continue + + # switch availability check + S3 = [] + S4 = [] + #remove_flag = 0 + cluster2 = tracklet_mat['track_cluster'][n].copy() + for k in range(len(cluster2)): + temp_id = cluster2[k] + + if tracklet_mat['track_interval'][temp_id,1]<=tracklet_mat['track_interval'][track_id,1]: + S3.append(temp_id) + else: + #******************************************** + if tracklet_mat['track_interval'][temp_id,1] >=tracklet_mat['track_interval'][track_id,1] \ + and tracklet_mat['track_interval'][temp_id,0] <=tracklet_mat['track_interval'][track_id,1]: + if tracklet_mat['track_interval'][temp_id,1] -tracklet_mat['track_interval'][track_id,1] \ + >tracklet_mat['track_interval'][track_id,1]-tracklet_mat['track_interval'][temp_id,0]: + S4.append(temp_id) + else: + S3.append(temp_id) + else: + S4.append(temp_id) + + neighbor_flag1 = 1 + conflict_flag1 = 0 + if len(S3)==0: + neighbor_flag1 = 1 + conflict_flag1 = 0 + else: + temp_cluster_set = S3+S2 + sort_idx = np.argsort(track_interval[np.array(temp_cluster_set),1]) + for k in range(0,len(sort_idx)-1): + track_id1 = temp_cluster_set[sort_idx[k]] + track_id2 = temp_cluster_set[sort_idx[k+1]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + neighbor_flag1 = 0 + break + if track_id1 in tracklet_mat['conflict_track_idx'][track_id2]: + conflict_flag1 = 1 + break + + + neighbor_flag2 = 1 + conflict_flag2 = 0 + if len(S4)==0: + neighbor_flag2 = 1 + conflict_flag2 = 0 + else: + temp_cluster_set = S4+S1 + sort_idx = np.argsort(track_interval[np.array(temp_cluster_set),1]) + for k in range(0,len(sort_idx)-1): + track_id1 = temp_cluster_set[sort_idx[k]] + track_id2 = temp_cluster_set[sort_idx[k+1]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + neighbor_flag2 = 0 + break + if track_id1 in tracklet_mat['conflict_track_idx'][track_id2]: + conflict_flag2 = 1 + break + + if neighbor_flag1==0 or conflict_flag1==1 or neighbor_flag2==0 or conflict_flag2==1: + continue + + # get cost + S_1 = S1+S4 + S_2 = S2+S3 + + new_cluster_cost_vec1[n,0] = comb_cost(S_1, sess) + + new_cluster_cost_vec2[n,0] = comb_cost(S_2, sess) + + cost_vec[n,0] = new_cluster_cost_vec1[n,0]+new_cluster_cost_vec2[n,0] + + track_id_set[n].append(S_1.copy()) + track_id_set[n].append(S_2.copy()) + prev_cost_vec[n,0] = tracklet_mat['cluster_cost'][tracklet_mat['track_class'][track_id]] \ + +tracklet_mat['cluster_cost'][n] + + cost_vec = cost_vec[:,0]-cross_cost_vec[:,1] + prev_cost_vec = prev_cost_vec[:,0]-cross_cost_vec[:,0] + diff_cost_vec = cost_vec-prev_cost_vec + min_idx = np.argmin(diff_cost_vec) + cost = cost_vec[min_idx] + if cost==float("inf"): + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + diff_cost = diff_cost_vec[min_idx] + new_cluster_cost = np.zeros((2,1)) + new_cluster_cost[0,0] = new_cluster_cost_vec1[min_idx,0] + new_cluster_cost[1,0] = new_cluster_cost_vec2[min_idx,0] + + change_cluster_idx = [tracklet_mat['track_class'][track_id], min_idx] + new_cluster_set = [] + new_cluster_set.append(track_id_set[min_idx][0]) + new_cluster_set.append(track_id_set[min_idx][1]) + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + +def get_break_cost(track_id, sess): + + global track_struct + tracklet_mat = track_struct['sub_tracklet_mat'] + + new_cluster_cost = np.zeros((2,1)) + cluster1 = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + if len(cluster1)<=2: + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + # get cost + after_ids = [] + for n in range(len(cluster1)): + if tracklet_mat['track_interval'][cluster1[n],1]>tracklet_mat['track_interval'][track_id,1]: + after_ids.append(cluster1[n]) + + if len(after_ids)==0: + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + before_ids = list(set(cluster1)-set(after_ids)) + if len(before_ids)<=1: + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + change_cluster_idx = [len(tracklet_mat['track_cluster']), tracklet_mat['track_class'][track_id]] + new_cluster_set = [] + new_cluster_set.append(before_ids) + remain_tracks = after_ids + new_cluster_set.append(remain_tracks) + new_cluster_cost[0,0] = comb_cost(new_cluster_set[0], sess) + + new_cluster_cost[1,0] = comb_cost(new_cluster_set[1], sess) + + cost = np.sum(new_cluster_cost) + diff_cost = cost-tracklet_mat['cluster_cost'][tracklet_mat['track_class'][track_id]] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + +def copy_sub_mat(): + global track_struct + track_struct['sub_tracklet_mat'] = {} + cand_track_idx = np.where(track_struct['tracklet_mat']['track_id_mat']!=-1)[0] + track_struct['sub_tracklet_mat']['xmin_mat'] = track_struct['tracklet_mat']['xmin_mat'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['ymin_mat'] = track_struct['tracklet_mat']['ymin_mat'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['xmax_mat'] = track_struct['tracklet_mat']['xmax_mat'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['ymax_mat'] = track_struct['tracklet_mat']['ymax_mat'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['x_3d_mat'] = track_struct['tracklet_mat']['x_3d_mat'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['y_3d_mat'] = track_struct['tracklet_mat']['y_3d_mat'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['w_3d_mat'] = track_struct['tracklet_mat']['w_3d_mat'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['h_3d_mat'] = track_struct['tracklet_mat']['h_3d_mat'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['det_score_mat'] = track_struct['tracklet_mat']['det_score_mat'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['track_interval'] = track_struct['tracklet_mat']['track_interval'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['obj_id_mat'] = track_struct['tracklet_mat']['obj_id_mat'][cand_track_idx].copy() + track_struct['sub_tracklet_mat']['track_id_mat'] = track_struct['tracklet_mat']['track_id_mat'][cand_track_idx].copy() + #track_struct['sub_tracklet_mat']['save_obj_id_mask'] = track_struct['tracklet_mat']['save_obj_id_mask'].copy() + #track_struct['sub_tracklet_mat']['assigned_obj_id_mask'] = track_struct['tracklet_mat']['assigned_obj_id_mask'].copy() + + # update comb_track_cost + change_idx = np.zeros(track_struct['track_params']['num_track'], dtype=int) + for n in range(track_struct['track_params']['num_track']): + if track_struct['tracklet_mat']['track_interval'][n,1]-track_struct['tracklet_mat']['track_interval'][n,0] \ + !=track_struct['tracklet_mat']['prev_track_interval'][n,1]-track_struct['tracklet_mat']['prev_track_interval'][n,0] \ + or (track_struct['tracklet_mat']['track_interval'][n,0]==0 + and track_struct['tracklet_mat']['prev_track_interval'][n,0]==0 + and track_struct['tracklet_mat']['track_interval'][n,1]==track_struct['track_params']['num_fr']-1 + and track_struct['tracklet_mat']['prev_track_interval'][n,1]==track_struct['track_params']['num_fr']-1): + change_idx[n] = 1 + + track_struct['tracklet_mat']['comb_track_cost'][change_idx==1,:] = 0 + track_struct['tracklet_mat']['comb_track_cost'][:,change_idx==1] = 0 + track_struct['tracklet_mat']['comb_track_cost_mask'][change_idx==1,:] = 0 + track_struct['tracklet_mat']['comb_track_cost_mask'][:,change_idx==1] = 0 + + temp_mat = track_struct['tracklet_mat']['comb_track_cost'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['comb_track_cost'] = temp_mat[:,cand_track_idx].copy() + + temp_mat = track_struct['tracklet_mat']['comb_track_cost_mask'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['comb_track_cost_mask'] = temp_mat[:,cand_track_idx].copy() + + fea_cand_idx = np.where(track_struct['tracklet_mat']['appearance_fea_mat'][:,0]!=-1)[0] + track_struct['sub_tracklet_mat']['appearance_fea_mat'] = track_struct['tracklet_mat']['appearance_fea_mat'][fea_cand_idx,:].copy() + + # update track_id for sub_tracklet_mat + for n in range(len(cand_track_idx)): + temp_idx = np.where(track_struct['sub_tracklet_mat']['appearance_fea_mat'][:,0]==cand_track_idx[n])[0] + track_struct['sub_tracklet_mat']['appearance_fea_mat'][temp_idx,0] = n + + return + +def init_clustering(): + + global track_struct + + # copy the sub tracklet_mat + copy_sub_mat() + + N_tracklet = track_struct['sub_tracklet_mat']['xmin_mat'].shape[0] + + # track cluster + track_struct['sub_tracklet_mat']['track_cluster'] = [] + + # track class + track_struct['sub_tracklet_mat']['track_class'] = np.arange(N_tracklet, dtype=int) + + # time cluster + track_struct['sub_tracklet_mat']['time_cluster'] = [] + for n in range(track_struct['track_params']['num_time_cluster']): + track_struct['sub_tracklet_mat']['time_cluster'].append([]) + + track_struct['sub_tracklet_mat']['track_cluster_t_idx'] = [] + for n in range(N_tracklet): + idx = np.where(track_struct['sub_tracklet_mat']['xmax_mat'][n,:]!=-1)[0] + if len(idx)==0: + import pdb; pdb.set_trace() + track_struct['sub_tracklet_mat']['track_interval'][n,0] = np.min(idx) + track_struct['sub_tracklet_mat']['track_interval'][n,1] = np.max(idx) + track_struct['sub_tracklet_mat']['track_cluster'].append([n]) + + if n in remove_set: + track_struct['sub_tracklet_mat']['track_cluster_t_idx'].append([-1]) + else: + min_time_cluster_idx = int(np.floor(max(track_struct['sub_tracklet_mat']['track_interval'][n,0] + -track_struct['track_params']['t_dist_thresh']-5,0) + /track_struct['track_params']['time_cluster_dist'])) + max_time_cluster_idx = int(np.floor(min(track_struct['sub_tracklet_mat']['track_interval'][n,1] + +track_struct['track_params']['t_dist_thresh']+5, + track_struct['sub_tracklet_mat']['xmin_mat'].shape[1]-1) + /track_struct['track_params']['time_cluster_dist'])) + track_struct['sub_tracklet_mat']['track_cluster_t_idx'].append(list(range(min_time_cluster_idx,max_time_cluster_idx+1))) + for k in range(min_time_cluster_idx,max_time_cluster_idx+1): + track_struct['sub_tracklet_mat']['time_cluster'][k].append(n) + + # get center position of each detection location + mask = track_struct['sub_tracklet_mat']['xmin_mat']==-1 + track_struct['sub_tracklet_mat']['center_x'] = \ + (track_struct['sub_tracklet_mat']['xmin_mat']+track_struct['sub_tracklet_mat']['xmax_mat'])/2 + track_struct['sub_tracklet_mat']['center_y'] = \ + (track_struct['sub_tracklet_mat']['ymin_mat']+track_struct['sub_tracklet_mat']['ymax_mat'])/2 + track_struct['sub_tracklet_mat']['w'] = \ + track_struct['sub_tracklet_mat']['xmax_mat']-track_struct['sub_tracklet_mat']['xmin_mat']+1 + track_struct['sub_tracklet_mat']['h'] = \ + track_struct['sub_tracklet_mat']['ymax_mat']-track_struct['sub_tracklet_mat']['ymin_mat']+1 + track_struct['sub_tracklet_mat']['center_x'][mask] = -1 + track_struct['sub_tracklet_mat']['center_y'][mask] = -1 + track_struct['sub_tracklet_mat']['w'][mask] = -1 + track_struct['sub_tracklet_mat']['h'][mask] = -1 + + # neighbor_track_idx and conflict_track_idx + track_struct['sub_tracklet_mat']['neighbor_track_idx'] = [] + track_struct['sub_tracklet_mat']['conflict_track_idx'] = [] + for n in range(N_tracklet): + track_struct['sub_tracklet_mat']['neighbor_track_idx'].append([]) + track_struct['sub_tracklet_mat']['conflict_track_idx'].append([]) + for n in range(N_tracklet-1): + for m in range(n+1, N_tracklet): + t_min1 = track_struct['sub_tracklet_mat']['track_interval'][n,0] + t_max1 = track_struct['sub_tracklet_mat']['track_interval'][n,1] + t_min2 = track_struct['sub_tracklet_mat']['track_interval'][m,0] + t_max2 = track_struct['sub_tracklet_mat']['track_interval'][m,1] + overlap_len = min(t_max2,t_max1)-max(t_min1,t_min2)+1 + overlap_r = overlap_len/(t_max1-t_min1+1+t_max2-t_min2+1-overlap_len) + if overlap_len>0 and overlap_r>track_struct['track_params']['track_overlap_thresh']: + track_struct['sub_tracklet_mat']['conflict_track_idx'][n].append(m) + track_struct['sub_tracklet_mat']['conflict_track_idx'][m].append(n) + continue + if overlap_len>0 and overlap_r<=track_struct['track_params']['track_overlap_thresh']: + # check the search region + t1 = int(max(t_min1,t_min2)) + t2 = int(min(t_max2,t_max1)) + if (t_min1<=t_min2 and t_max1>=t_max2) or (t_min1>=t_min2 and t_max1<=t_max2) or overlap_len>4: + track_struct['sub_tracklet_mat']['conflict_track_idx'][n].append(m) + track_struct['sub_tracklet_mat']['conflict_track_idx'][m].append(n) + continue + + cand_t = np.array(range(t1,t2+1)) + dist_x = abs(track_struct['sub_tracklet_mat']['center_x'][n,cand_t] \ + -track_struct['sub_tracklet_mat']['center_x'][m,cand_t]) + dist_y = abs(track_struct['sub_tracklet_mat']['center_y'][n,cand_t] \ + -track_struct['sub_tracklet_mat']['center_y'][m,cand_t]) + w1 = track_struct['sub_tracklet_mat']['w'][n,cand_t] + h1 = track_struct['sub_tracklet_mat']['h'][n,cand_t] + w2 = track_struct['sub_tracklet_mat']['w'][m,cand_t] + h2 = track_struct['sub_tracklet_mat']['h'][m,cand_t] + + min_len = np.min([np.min(w1),np.min(h1),np.min(w2),np.min(h2)]) + min_dist_x1 = np.min(dist_x/min_len) + min_dist_y1 = np.min(dist_y/min_len) + min_dist_x2 = np.min(dist_x/min_len) + min_dist_y2 = np.min(dist_y/min_len) + if min_dist_x1=t_max2: + t1 = int(t_min1) + t2 = int(t_max2) + else: + t1 = int(t_max1) + t2 = int(t_min2) + + #*********************************** + tr_t1 = np.array(range(int(t_min1),int(t_max1+1))) + tr_x1 = track_struct['sub_tracklet_mat']['center_x'][n,int(t_min1):int(t_max1+1)] + tr_y1 = track_struct['sub_tracklet_mat']['center_y'][n,int(t_min1):int(t_max1+1)] + if len(tr_t1)>10: + if t_min1>=t_max2: + tr_t1 = tr_t1[0:10] + tr_x1 = tr_x1[0:10] + tr_y1 = tr_y1[0:10] + else: + tr_t1 = tr_t1[-10:] + tr_x1 = tr_x1[-10:] + tr_y1 = tr_y1[-10:] + ts_x1 = track_lib.linear_pred_v2(tr_t1, tr_x1, np.array([t2])) + ts_y1 = track_lib.linear_pred_v2(tr_t1, tr_y1, np.array([t2])) + dist_x1 = abs(ts_x1[0]-track_struct['sub_tracklet_mat']['center_x'][m,t2]) + dist_y1 = abs(ts_y1[0]-track_struct['sub_tracklet_mat']['center_y'][m,t2]) + + tr_t2 = np.array(range(int(t_min2),int(t_max2+1))) + tr_x2 = track_struct['sub_tracklet_mat']['center_x'][m,int(t_min2):int(t_max2+1)] + tr_y2 = track_struct['sub_tracklet_mat']['center_y'][m,int(t_min2):int(t_max2+1)] + if len(tr_t2)>10: + if t_min2>t_max1: + tr_t2 = tr_t2[0:10] + tr_x2 = tr_x2[0:10] + tr_y2 = tr_y2[0:10] + else: + tr_t2 = tr_t2[-10:] + tr_x2 = tr_x2[-10:] + tr_y2 = tr_y2[-10:] + + ts_x2 = track_lib.linear_pred_v2(tr_t2, tr_x2, np.array([t1])) + ts_y2 = track_lib.linear_pred_v2(tr_t2, tr_y2, np.array([t1])) + dist_x2 = abs(ts_x2[0]-track_struct['sub_tracklet_mat']['center_x'][n,t1]) + dist_y2 = abs(ts_y2[0]-track_struct['sub_tracklet_mat']['center_y'][n,t1]) + + dist_x = min(dist_x1, dist_x2) + dist_y = min(dist_y1, dist_y2) + #*********************************** + + + w1 = track_struct['sub_tracklet_mat']['w'][n,t1] + h1 = track_struct['sub_tracklet_mat']['h'][n,t1] + w2 = track_struct['sub_tracklet_mat']['w'][m,t2] + h2 = track_struct['sub_tracklet_mat']['h'][m,t2] + + min_len = np.min([np.min(w1),np.min(h1),np.min(w2),np.min(h2)]) + min_dist_x1 = dist_x/min_len + min_dist_y1 = dist_y/min_len + min_dist_x2 = dist_x/min_len + min_dist_y2 = dist_y/min_len + + if min_dist_x1=0: + continue + + change_flag = 1 + + #**************** + #import pdb; pdb.set_trace() + # print(min_idx) + # print(new_set) + new_t_idx = [] + if len(new_set[min_idx][0])==0: + new_t_idx.append([-1]) + else: + t_min_array = np.zeros((len(new_set[min_idx][0]),1)) + t_max_array = np.zeros((len(new_set[min_idx][0]),1)) + for m in range(len(new_set[min_idx][0])): + t_min_array[m,0] = track_struct['sub_tracklet_mat']['track_interval'][new_set[min_idx][0][m],0] + t_max_array[m,0] = track_struct['sub_tracklet_mat']['track_interval'][new_set[min_idx][0][m],1] + + min_time_cluster_idx = int(np.floor(max(np.min(t_min_array) + -track_struct['track_params']['t_dist_thresh']-5,0) + /track_struct['track_params']['time_cluster_dist'])) + max_time_cluster_idx = int(np.floor(min(np.max(t_max_array) + +track_struct['track_params']['t_dist_thresh']+5, + track_struct['sub_tracklet_mat']['xmin_mat'].shape[1]-1) + /track_struct['track_params']['time_cluster_dist'])) + new_t_idx.append(list(range(min_time_cluster_idx,max_time_cluster_idx+1))) + + if len(new_set[min_idx][1])==0: + new_t_idx.append([-1]) + else: + t_min_array = np.zeros((len(new_set[min_idx][1]),1)) + t_max_array = np.zeros((len(new_set[min_idx][1]),1)) + for m in range(len(new_set[min_idx][1])): + t_min_array[m,0] = track_struct['sub_tracklet_mat']['track_interval'][new_set[min_idx][1][m],0] + t_max_array[m,0] = track_struct['sub_tracklet_mat']['track_interval'][new_set[min_idx][1][m],1] + + min_time_cluster_idx = int(np.floor(max(np.min(t_min_array) + -track_struct['track_params']['t_dist_thresh']-5,0) + /track_struct['track_params']['time_cluster_dist'])) + max_time_cluster_idx = int(np.floor(min(np.max(t_max_array) + +track_struct['track_params']['t_dist_thresh']+5, + track_struct['sub_tracklet_mat']['xmin_mat'].shape[1]-1) + /track_struct['track_params']['time_cluster_dist'])) + new_t_idx.append(list(range(min_time_cluster_idx,max_time_cluster_idx+1))) + + if change_idx[min_idx][0]>=len(track_struct['sub_tracklet_mat']['track_cluster']): + for m in range(len(track_struct['sub_tracklet_mat']['track_cluster']),change_idx[min_idx][0]): + track_struct['sub_tracklet_mat']['track_cluster'].append([]) + track_struct['sub_tracklet_mat']['track_cluster_t_idx'].append([-1]) + track_struct['sub_tracklet_mat']['track_cluster'].append(new_set[min_idx][0]) + track_struct['sub_tracklet_mat']['track_cluster_t_idx'].append(new_t_idx[0]) + else: + track_struct['sub_tracklet_mat']['track_cluster'][change_idx[min_idx][0]] = new_set[min_idx][0] + track_struct['sub_tracklet_mat']['track_cluster_t_idx'][change_idx[min_idx][0]] = new_t_idx[0] + + if change_idx[min_idx][1]>=len(track_struct['sub_tracklet_mat']['track_cluster']): + for m in range(len(track_struct['sub_tracklet_mat']['track_cluster']),change_idx[min_idx][1]): + track_struct['sub_tracklet_mat']['track_cluster'].append([]) + track_struct['sub_tracklet_mat']['track_cluster_t_idx'].append([-1]) + track_struct['sub_tracklet_mat']['track_cluster'].append(new_set[min_idx][1]) + track_struct['sub_tracklet_mat']['track_cluster_t_idx'].append(new_t_idx[1]) + else: + track_struct['sub_tracklet_mat']['track_cluster'][change_idx[min_idx][1]] = new_set[min_idx][1] + track_struct['sub_tracklet_mat']['track_cluster_t_idx'][change_idx[min_idx][1]] = new_t_idx[1] + + #import pdb; pdb.set_trace() + for m in range(track_struct['track_params']['num_time_cluster']): + #import pdb; pdb.set_trace() + if change_idx[min_idx][0] in track_struct['sub_tracklet_mat']['time_cluster'][m]: + track_struct['sub_tracklet_mat']['time_cluster'][m].remove(change_idx[min_idx][0]) + if change_idx[min_idx][1] in track_struct['sub_tracklet_mat']['time_cluster'][m]: + track_struct['sub_tracklet_mat']['time_cluster'][m].remove(change_idx[min_idx][1]) + + for m in range(track_struct['track_params']['num_time_cluster']): + if m in new_t_idx[0]: + track_struct['sub_tracklet_mat']['time_cluster'][m].append(change_idx[min_idx][0]) + if m in new_t_idx[1]: + track_struct['sub_tracklet_mat']['time_cluster'][m].append(change_idx[min_idx][1]) + + if change_idx[min_idx][0]>=len(track_struct['sub_tracklet_mat']['cluster_cost']): + for m in range(len(track_struct['sub_tracklet_mat']['cluster_cost']),change_idx[min_idx][0]): + track_struct['sub_tracklet_mat']['cluster_cost'].append(0) + track_struct['sub_tracklet_mat']['cluster_cost'].append(new_C[min_idx][0]) + else: + track_struct['sub_tracklet_mat']['cluster_cost'][change_idx[min_idx][0]] = new_C[min_idx][0] + + if change_idx[min_idx][1]>=len(track_struct['sub_tracklet_mat']['cluster_cost']): + for m in range(len(track_struct['sub_tracklet_mat']['cluster_cost']),change_idx[min_idx][1]): + track_struct['sub_tracklet_mat']['cluster_cost'].append([]) + track_struct['sub_tracklet_mat']['cluster_cost'].append(new_C[min_idx][1]) + else: + track_struct['sub_tracklet_mat']['cluster_cost'][change_idx[min_idx][1]] = new_C[min_idx][1] + + for k in range(len(track_struct['sub_tracklet_mat']['track_cluster'][change_idx[min_idx][0]])): + track_struct['sub_tracklet_mat']['track_class'][track_struct['sub_tracklet_mat'] \ + ['track_cluster'][change_idx[min_idx][0]][k]] = change_idx[min_idx][0] + + for k in range(len(track_struct['sub_tracklet_mat']['track_cluster'][change_idx[min_idx][1]])): + track_struct['sub_tracklet_mat']['track_class'][track_struct['sub_tracklet_mat'] \ + ['track_cluster'][change_idx[min_idx][1]][k]] = change_idx[min_idx][1] + #import pdb; pdb.set_trace() + return change_flag + +def feature_encode(sess, image_paths, batch_size): + + # Run forward pass to calculate embeddings + #print('Runnning forward pass on LFW images') + + use_flipped_images = False + use_fixed_image_standardization = False + use_random_rotate = False + use_radnom_crop = False + # Enqueue one epoch of image paths and labels + nrof_embeddings = len(image_paths) # nrof_pairs * nrof_images_per_pair + nrof_flips = 2 if use_flipped_images else 1 + nrof_images = nrof_embeddings * nrof_flips + labels_array = np.expand_dims(np.arange(0,nrof_images),1) + image_paths_array = np.expand_dims(np.repeat(np.array(image_paths),nrof_flips),1) + control_array = np.zeros_like(labels_array, np.int32) + + if use_fixed_image_standardization: + control_array += np.ones_like(labels_array)*facenet.FIXED_STANDARDIZATION + if use_flipped_images: + # Flip every second image + control_array += (labels_array % 2)*facenet.FLIP + if use_random_rotate: + control_array += facenet.RANDOM_ROTATE + if use_radnom_crop: + control_array += facenet.RANDOM_CROP + + sess.run(eval_enqueue_op, {image_paths_placeholder: image_paths_array, + labels_placeholder: labels_array, control_placeholder: control_array}) + + embedding_size = int(embeddings.get_shape()[1]) + assert nrof_images % batch_size == 0, 'The number of LFW images must be an integer multiple of the LFW batch size' + nrof_batches = nrof_images // batch_size + emb_array = np.zeros((nrof_images, embedding_size)) + lab_array = np.zeros((nrof_images,)) + for i in range(nrof_batches): + feed_dict = {phase_train_placeholder:False, batch_size_placeholder:batch_size} + emb, lab = sess.run([embeddings, label_batch], feed_dict=feed_dict) + lab_array[lab] = lab + emb_array[lab, :] = emb + if i % 10 == 9: + # print('.', end='') + sys.stdout.flush() + #import pdb; pdb.set_trace() + #np.savetxt("emb_array.csv", emb_array, delimiter=",") + return emb_array + +def crop_det(det_M, img): + global track_struct + crop_det_folder = track_struct['file_path']['crop_det_folder'] + crop_size = track_struct['track_params']['crop_size'] + if not os.path.isdir(crop_det_folder): + os.makedirs(crop_det_folder) + + save_patch_list = [] + for n in range(len(det_M)): + xmin = int(max(0,det_M[n,1])) + xmax = int(min(img.shape[1]-1,det_M[n,1]+det_M[n,3])) + ymin = int(max(0,det_M[n,2])) + ymax = int(min(img.shape[0]-1,det_M[n,2]+det_M[n,4])) + img_patch = img[ymin:ymax,xmin:xmax,:] + img_patch = misc.imresize(img_patch, size=[crop_size,crop_size]) + patch_name = track_lib.file_name(n,4)+'.png' + save_path = crop_det_folder+'/'+patch_name + misc.imsave(save_path, img_patch) + save_patch_list.append(save_path) + + return save_patch_list + +def init_tracklet_model(): + global track_struct + global tracklet_graph + global tracklet_sess + + global batch_X_x + global batch_X_y + global batch_X_w + global batch_X_h + global batch_X_a + global batch_mask_1 + global batch_mask_2 + global batch_Y + global keep_prob + global y_conv + + max_length = track_struct['track_params']['max_length'] + batch_size = track_struct['track_params']['batch_size'] + feature_size = track_struct['track_params']['feature_size'] + num_classes = track_struct['track_params']['num_classes'] + + # build tracklet graph + tracklet_graph = tf.Graph() + with tracklet_graph.as_default(): + # load nn + batch_X_x = tf.placeholder(tf.float32, [None, 1, max_length, 1]) + batch_X_y = tf.placeholder(tf.float32, [None, 1, max_length, 1]) + batch_X_w = tf.placeholder(tf.float32, [None, 1, max_length, 1]) + batch_X_h = tf.placeholder(tf.float32, [None, 1, max_length, 1]) + batch_X_a = tf.placeholder(tf.float32, [None, feature_size-4, max_length, 1]) + batch_mask_1 = tf.placeholder(tf.float32, [None, 1, max_length, 2]) + batch_mask_2 = tf.placeholder(tf.float32, [None, feature_size-4, max_length, 2]) + batch_Y = tf.placeholder(tf.int32, [None, num_classes]) + keep_prob = tf.placeholder(tf.float32) + + y_conv = seq_nn_3d_v2.seq_nn(batch_X_x,batch_X_y,batch_X_w,batch_X_h,batch_X_a,batch_mask_1, + batch_mask_2,batch_Y,max_length,feature_size,keep_prob) + + cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=batch_Y, logits=y_conv)) + train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) + correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(batch_Y, 1)) + accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) + + tracklet_init = tf.global_variables_initializer() + tracklet_saver = tf.train.Saver() + + tracklet_sess = tf.Session(graph=tracklet_graph) + with tracklet_sess.as_default(): + tracklet_saver.restore(tracklet_sess, track_struct['file_path']['seq_model']) + print("Tracklet model restored.") + return + +def init_triplet_model(): + global track_struct + global triplet_graph + global triplet_sess + + global eval_enqueue_op + global image_paths_placeholder + global labels_placeholder + global phase_train_placeholder + global batch_size_placeholder + global control_placeholder + global embeddings + global label_batch + global distance_metric + f_image_size = 160 + distance_metric = 0 + + triplet_graph = tf.Graph() + with triplet_graph.as_default(): + image_paths_placeholder = tf.placeholder(tf.string, shape=(None,1), name='image_paths') + labels_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='labels') + batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') + control_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='control') + phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') + + nrof_preprocess_threads = 4 + image_size = (f_image_size, f_image_size) + eval_input_queue = data_flow_ops.FIFOQueue(capacity=2000000, + dtypes=[tf.string, tf.int32, tf.int32], + shapes=[(1,), (1,), (1,)], + shared_name=None, name=None) + eval_enqueue_op = eval_input_queue.enqueue_many([image_paths_placeholder, + labels_placeholder, control_placeholder], + name='eval_enqueue_op') + image_batch, label_batch = facenet.create_input_pipeline(eval_input_queue, image_size, + nrof_preprocess_threads, batch_size_placeholder) + triplet_sess = tf.Session(graph=triplet_graph) + with triplet_sess.as_default(): + with triplet_graph.as_default(): + # Load the model + input_map = {'image_batch': image_batch, 'label_batch': label_batch, 'phase_train': phase_train_placeholder} + facenet.load_model(track_struct['file_path']['triplet_model'], input_map=input_map) + + # Get output tensor + embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") + coord = tf.train.Coordinator() + tf.train.start_queue_runners(coord=coord, sess=triplet_sess) + return + +def TC_online(det_M, img, t_pointer, fr_idx, end_flag): + global track_struct + global triplet_graph + global triplet_sess + global tracklet_graph + global tracklet_sess + + prev_t_pointer = t_pointer + num_bbox = len(det_M) + #print(num_bbox) + track_struct['track_params']['img_size'] = img.shape + track_struct['tracklet_mat']['imgs'].append(img) + + # last frame in the time window + max_track_id = np.max(track_struct['tracklet_mat']['track_id_mat']) + if t_pointer==track_struct['track_params']['num_fr']: + #import pdb; pdb.set_trace() + # save tracking to file + # fr_id, obj_id, track_id, x, y, w, h, x_3d, y_3d, w_3d, h_3d, det_score + track_idx = np.where(track_struct['tracklet_mat']['xmin_mat'][:,0]!=-1)[0] + num_save_id = len(track_idx) + if num_save_id!=0: + save_mat = np.zeros((num_save_id, 12)) + save_mat[:,0] = int(fr_idx-track_struct['track_params']['num_fr']) + save_mat[:,1] = track_struct['tracklet_mat']['obj_id_mat'][track_idx] + track_struct['tracklet_mat']['save_obj_id_mask'][save_mat[:,1].astype(int)] = 1 + save_mat[:,2] = track_struct['tracklet_mat']['track_id_mat'][track_idx] + save_mat[:,3] = track_struct['tracklet_mat']['xmin_mat'][track_idx,0] + save_mat[:,4] = track_struct['tracklet_mat']['ymin_mat'][track_idx,0] + save_mat[:,5] = track_struct['tracklet_mat']['xmax_mat'][track_idx,0] \ + -track_struct['tracklet_mat']['xmin_mat'][track_idx,0] + save_mat[:,6] = track_struct['tracklet_mat']['ymax_mat'][track_idx,0] \ + -track_struct['tracklet_mat']['ymin_mat'][track_idx,0] + save_mat[:,7] = track_struct['tracklet_mat']['x_3d_mat'][track_idx,0] + save_mat[:,8] = track_struct['tracklet_mat']['y_3d_mat'][track_idx,0] + save_mat[:,9] = track_struct['tracklet_mat']['w_3d_mat'][track_idx,0] + save_mat[:,10] = track_struct['tracklet_mat']['h_3d_mat'][track_idx,0] + save_mat[:,11] = track_struct['tracklet_mat']['det_score_mat'][track_idx,0] + #save_mat[:,12] = track_struct['tracklet_mat']['class_name'][track_idx,0] + #save_mat[:,13] = track_struct['tracklet_mat']['dist2cam'][track_idx,0] + + #import pdb; pdb.set_trace() + f = open(track_struct['file_path']['txt_result_path'], 'a') + np.savetxt(f, save_mat, delimiter=',') + f.close() + wrt_missing_det(save_mat) + + else: + save_mat = [] + + #draw_result(track_struct['tracklet_mat']['imgs'][0], save_mat, fr_idx-track_struct['track_params']['num_fr']) + #del track_struct['tracklet_mat']['imgs'][0] + + # Slide the time window + track_struct['tracklet_mat']['xmin_mat'][:,:-1] = track_struct['tracklet_mat']['xmin_mat'][:,1:] + track_struct['tracklet_mat']['xmin_mat'][:,-1] = -1 + track_struct['tracklet_mat']['ymin_mat'][:,:-1] = track_struct['tracklet_mat']['ymin_mat'][:,1:] + track_struct['tracklet_mat']['ymin_mat'][:,-1] = -1 + track_struct['tracklet_mat']['xmax_mat'][:,:-1] = track_struct['tracklet_mat']['xmax_mat'][:,1:] + track_struct['tracklet_mat']['xmax_mat'][:,-1] = -1 + track_struct['tracklet_mat']['ymax_mat'][:,:-1] = track_struct['tracklet_mat']['ymax_mat'][:,1:] + track_struct['tracklet_mat']['ymax_mat'][:,-1] = -1 + track_struct['tracklet_mat']['x_3d_mat'][:,:-1] = track_struct['tracklet_mat']['x_3d_mat'][:,1:] + track_struct['tracklet_mat']['x_3d_mat'][:,-1] = -1 + track_struct['tracklet_mat']['y_3d_mat'][:,:-1] = track_struct['tracklet_mat']['y_3d_mat'][:,1:] + track_struct['tracklet_mat']['y_3d_mat'][:,-1] = -1 + track_struct['tracklet_mat']['w_3d_mat'][:,:-1] = track_struct['tracklet_mat']['w_3d_mat'][:,1:] + track_struct['tracklet_mat']['w_3d_mat'][:,-1] = -1 + track_struct['tracklet_mat']['h_3d_mat'][:,:-1] = track_struct['tracklet_mat']['h_3d_mat'][:,1:] + track_struct['tracklet_mat']['h_3d_mat'][:,-1] = -1 + track_struct['tracklet_mat']['det_score_mat'][:,:-1] = track_struct['tracklet_mat']['det_score_mat'][:,1:] + track_struct['tracklet_mat']['det_score_mat'][:,-1] = -1 + track_struct['tracklet_mat']['track_interval'] = track_struct['tracklet_mat']['track_interval']-1 + track_struct['tracklet_mat']['track_interval'][track_struct['tracklet_mat']['track_interval'][:,0]<0,0] = 0 + track_struct['tracklet_mat']['track_interval'][track_struct['tracklet_mat']['track_interval'][:,1]<0,0] = -1 + track_struct['tracklet_mat']['track_interval'][track_struct['tracklet_mat']['track_interval'][:,1]<0,1] = -1 + + track_struct['tracklet_mat']['obj_id_mat'][track_struct['tracklet_mat']['track_interval'][:,1]==-1] = -1 + track_struct['tracklet_mat']['track_id_mat'][track_struct['tracklet_mat']['track_interval'][:,1]==-1] = -1 + + #track_struct['tracklet_mat']['class_name'][:,:-1] = track_struct['tracklet_mat']['class_name'][:,1:] + #track_struct['tracklet_mat']['class_name'][:,-1] = -1 + #track_struct['tracklet_mat']['dist2cam'][:,:-1] = track_struct['tracklet_mat']['dist2cam'][:,1:] + #track_struct['tracklet_mat']['dist2cam'][:,-1] = -1 + + t_pointer = t_pointer-1 + + remove_fr_idx = fr_idx-track_struct['track_params']['num_fr'] + remove_fea_idx = np.where(track_struct['tracklet_mat']['appearance_fea_mat'][:,1]==remove_fr_idx)[0] + track_struct['tracklet_mat']['appearance_fea_mat'][remove_fea_idx,:] = -1 + + track_struct['tracklet_mat']['assigned_obj_id_mask'] = track_struct['tracklet_mat']['save_obj_id_mask'].copy() + assigned_ids = track_struct['tracklet_mat']['obj_id_mat'][track_struct['tracklet_mat']['obj_id_mat']!=-1] + track_struct['tracklet_mat']['assigned_obj_id_mask'][assigned_ids] = 1 + avai_ids = np.where(track_struct['tracklet_mat']['assigned_obj_id_mask']==0)[0] + + #if fr_idx-track_struct['track_params']['num_fr']==214: + # import pdb; pdb.set_trace() + empty_idx = np.where(track_struct['tracklet_mat']['track_id_mat']==-1)[0] + empty_fea_idx = np.where(track_struct['tracklet_mat']['appearance_fea_mat'][:,0]==-1)[0] + + # crop detection results and extract cnn features + if num_bbox!=0: + patch_list = crop_det(det_M, img) + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[0:len(patch_list)],2:] \ + = 10*feature_encode(triplet_sess, patch_list, len(patch_list)) + + # remove folder + shutil.rmtree(track_struct['file_path']['crop_det_folder']) + + # Forward tracking + if t_pointer==0 and num_bbox!=0: + track_struct['tracklet_mat']['obj_id_mat'][empty_idx[0:num_bbox]] = avai_ids[0:num_bbox] + track_struct['tracklet_mat']['track_id_mat'][empty_idx[0:num_bbox]] = np.array(range(num_bbox),dtype=int)+max_track_id+1 + track_struct['tracklet_mat']['xmin_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,1] + track_struct['tracklet_mat']['ymin_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,2] + track_struct['tracklet_mat']['xmax_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,1]+det_M[:,3] + track_struct['tracklet_mat']['ymax_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,2]+det_M[:,4] + track_struct['tracklet_mat']['x_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,6] + track_struct['tracklet_mat']['y_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,7] + track_struct['tracklet_mat']['w_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,8] + track_struct['tracklet_mat']['h_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,9] + track_struct['tracklet_mat']['det_score_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,5] + track_struct['tracklet_mat']['track_interval'][empty_idx[0:num_bbox],0] = t_pointer + track_struct['tracklet_mat']['track_interval'][empty_idx[0:num_bbox],1] = t_pointer + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[0:len(patch_list)],0] = empty_idx[0:num_bbox] + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[0:len(patch_list)],1] = fr_idx + #track_struct['tracklet_mat']['class_name'][empty_idx[0:num_bbox],t_pointer] = det_M[:,10] + #track_struct['tracklet_mat']['dist2cam'][empty_idx[0:num_bbox],t_pointer] = det_M[:,11] + + elif t_pointer!=0 and num_bbox!=0: + #import pdb; pdb.set_trace() + prev_bbox_idx = np.where(track_struct['tracklet_mat']['xmin_mat'][:,t_pointer-1]!=-1)[0] + prev_num_bbox = len(prev_bbox_idx) + if prev_num_bbox==0: + track_struct['tracklet_mat']['obj_id_mat'][empty_idx[0:num_bbox]] = avai_ids[0:num_bbox] + track_struct['tracklet_mat']['track_id_mat'][empty_idx[0:num_bbox]] = np.array(range(num_bbox),dtype=int)+max_track_id+1 + track_struct['tracklet_mat']['xmin_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,1] + track_struct['tracklet_mat']['ymin_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,2] + track_struct['tracklet_mat']['xmax_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,1]+det_M[:,3] + track_struct['tracklet_mat']['ymax_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,2]+det_M[:,4] + track_struct['tracklet_mat']['x_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,6] + track_struct['tracklet_mat']['y_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,7] + track_struct['tracklet_mat']['w_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,8] + track_struct['tracklet_mat']['h_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,9] + track_struct['tracklet_mat']['det_score_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,5] + track_struct['tracklet_mat']['track_interval'][empty_idx[0:num_bbox],0] = t_pointer + track_struct['tracklet_mat']['track_interval'][empty_idx[0:num_bbox],1] = t_pointer + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[0:len(patch_list)],0] = empty_idx[0:num_bbox] + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[0:len(patch_list)],1] = fr_idx + #track_struct['tracklet_mat']['class_name'][empty_idx[0:num_bbox],t_pointer] = det_M[:,10] + #track_struct['tracklet_mat']['dist2cam'][empty_idx[0:num_bbox],t_pointer] = det_M[:,11] + else: + # predict bbox location + bbox1 = np.zeros((prev_num_bbox,4)) + bbox1[:,0] = track_struct['tracklet_mat']['xmin_mat'][prev_bbox_idx,t_pointer-1] + bbox1[:,1] = track_struct['tracklet_mat']['ymin_mat'][prev_bbox_idx,t_pointer-1] + bbox1[:,2] = track_struct['tracklet_mat']['xmax_mat'][prev_bbox_idx,t_pointer-1] \ + -track_struct['tracklet_mat']['xmin_mat'][prev_bbox_idx,t_pointer-1]+1 + bbox1[:,3] = track_struct['tracklet_mat']['ymax_mat'][prev_bbox_idx,t_pointer-1] \ + -track_struct['tracklet_mat']['ymin_mat'][prev_bbox_idx,t_pointer-1]+1 + pred_bbox1 = np.zeros((prev_num_bbox,4)) + + bbox2 = np.zeros((num_bbox,4)) + bbox2[:,:] = det_M[:,1:5] + + # bbox association + for k in range(prev_num_bbox): + temp_track_id = prev_bbox_idx[k] + t_idx = np.where(track_struct['tracklet_mat']['xmin_mat'][temp_track_id,:]!=-1)[0] + t_min = np.min(t_idx) + if t_mintrack_struct['track_params']['color_thresh']] = 0 + idx1, idx2 = track_lib.bbox_associate(overlap_mat, track_struct['track_params']['IOU_thresh']) + #if fr_idx==14: + #import pdb; pdb.set_trace() + + # assign the tracklet_mat + if len(idx1)==0: + track_struct['tracklet_mat']['obj_id_mat'][empty_idx[0:num_bbox]] = avai_ids[0:num_bbox] + track_struct['tracklet_mat']['track_id_mat'][empty_idx[0:num_bbox]] = np.array(range(num_bbox),dtype=int)+max_track_id+1 + track_struct['tracklet_mat']['xmin_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,1] + track_struct['tracklet_mat']['ymin_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,2] + track_struct['tracklet_mat']['xmax_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,1]+det_M[:,3] + track_struct['tracklet_mat']['ymax_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,2]+det_M[:,4] + track_struct['tracklet_mat']['x_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,6] + track_struct['tracklet_mat']['y_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,7] + track_struct['tracklet_mat']['w_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,8] + track_struct['tracklet_mat']['h_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,9] + track_struct['tracklet_mat']['det_score_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,5] + track_struct['tracklet_mat']['track_interval'][empty_idx[0:num_bbox],0] = t_pointer + track_struct['tracklet_mat']['track_interval'][empty_idx[0:num_bbox],1] = t_pointer + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[0:len(patch_list)],0] = empty_idx[0:num_bbox] + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[0:len(patch_list)],1] = fr_idx + #track_struct['tracklet_mat']['class_name'][empty_idx[0:num_bbox],t_pointer] = det_M[:,10] + #track_struct['tracklet_mat']['dist2cam'][empty_idx[0:num_bbox],t_pointer] = det_M[:,11] + else: + cnt1 = -1 + cnt2 = -1 + for n in range(num_bbox): + if n not in list(idx2): + cnt1 = cnt1+1 + track_struct['tracklet_mat']['obj_id_mat'][empty_idx[cnt1]] \ + = avai_ids[cnt1] + track_struct['tracklet_mat']['track_id_mat'][empty_idx[cnt1]] \ + = cnt1+max_track_id+1 + track_struct['tracklet_mat']['xmin_mat'][empty_idx[cnt1],t_pointer] = det_M[n,1] + track_struct['tracklet_mat']['ymin_mat'][empty_idx[cnt1],t_pointer] = det_M[n,2] + track_struct['tracklet_mat']['xmax_mat'][empty_idx[cnt1],t_pointer] = det_M[n,1]+det_M[n,3] + track_struct['tracklet_mat']['ymax_mat'][empty_idx[cnt1],t_pointer] = det_M[n,2]+det_M[n,4] + track_struct['tracklet_mat']['x_3d_mat'][empty_idx[cnt1],t_pointer] = det_M[n,6] + track_struct['tracklet_mat']['y_3d_mat'][empty_idx[cnt1],t_pointer] = det_M[n,7] + track_struct['tracklet_mat']['w_3d_mat'][empty_idx[cnt1],t_pointer] = det_M[n,8] + track_struct['tracklet_mat']['h_3d_mat'][empty_idx[cnt1],t_pointer] = det_M[n,9] + track_struct['tracklet_mat']['det_score_mat'][empty_idx[cnt1],t_pointer] = det_M[n,5] + track_struct['tracklet_mat']['track_interval'][empty_idx[cnt1],0] = t_pointer + track_struct['tracklet_mat']['track_interval'][empty_idx[cnt1],1] = t_pointer + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[n],0] = empty_idx[cnt1] + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[n],1] = fr_idx + #track_struct['tracklet_mat']['class_name'][empty_idx[cnt1],t_pointer] = det_M[n,10] + #track_struct['tracklet_mat']['dist2cam'][empty_idx[cnt1],t_pointer] = det_M[n,11] + else: + temp_idx = np.where(idx2==n)[0] + track_struct['tracklet_mat']['xmin_mat'][prev_bbox_idx[idx1[temp_idx[0]]],t_pointer] = det_M[n,1] + track_struct['tracklet_mat']['ymin_mat'][prev_bbox_idx[idx1[temp_idx[0]]],t_pointer] = det_M[n,2] + track_struct['tracklet_mat']['xmax_mat'][prev_bbox_idx[idx1[temp_idx[0]]],t_pointer] = det_M[n,1]+det_M[n,3] + track_struct['tracklet_mat']['ymax_mat'][prev_bbox_idx[idx1[temp_idx[0]]],t_pointer] = det_M[n,2]+det_M[n,4] + track_struct['tracklet_mat']['x_3d_mat'][prev_bbox_idx[idx1[temp_idx[0]]],t_pointer] = det_M[n,6] + track_struct['tracklet_mat']['y_3d_mat'][prev_bbox_idx[idx1[temp_idx[0]]],t_pointer] = det_M[n,7] + track_struct['tracklet_mat']['w_3d_mat'][prev_bbox_idx[idx1[temp_idx[0]]],t_pointer] = det_M[n,8] + track_struct['tracklet_mat']['h_3d_mat'][prev_bbox_idx[idx1[temp_idx[0]]],t_pointer] = det_M[n,9] + track_struct['tracklet_mat']['det_score_mat'][prev_bbox_idx[idx1[temp_idx[0]]],t_pointer] = det_M[n,5] + track_struct['tracklet_mat']['track_interval'][prev_bbox_idx[idx1[temp_idx[0]]],1] = t_pointer + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[n],0] = prev_bbox_idx[idx1[temp_idx[0]]] + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[n],1] = fr_idx + #track_struct['tracklet_mat']['class_name'][prev_bbox_idx[idx1[temp_idx[0]]],t_pointer] = det_M[n,10] + #track_struct['tracklet_mat']['dist2cam'][prev_bbox_idx[idx1[temp_idx[0]]],t_pointer] = det_M[n,11] + + track_struct['tracklet_mat']['assigned_obj_id_mask'] = track_struct['tracklet_mat']['save_obj_id_mask'].copy() + assigned_ids = track_struct['tracklet_mat']['obj_id_mat'][track_struct['tracklet_mat']['obj_id_mat']!=-1] + track_struct['tracklet_mat']['assigned_obj_id_mask'][assigned_ids] = 1 + avai_ids = np.where(track_struct['tracklet_mat']['assigned_obj_id_mask']==0)[0] + + # Tracklet clustering + + iters = 20 + if fr_idx%track_struct['track_params']['clustering_period']==track_struct['track_params']['clustering_period']-1 or end_flag==1: + for n in range(iters): + # print("iteration") + # print(n) + change_flag = tracklet_clustering(tracklet_sess, n) + if change_flag==0: + #import pdb; pdb.set_trace() + #time_check_flag = time_cluster_check() + break + + # Update tracklet + debug_mode = 0 + if fr_idx-track_struct['track_params']['num_fr']>190: + debug_mode = 1 + + print('-------') + print(debug_mode) + print(fr_idx-track_struct['track_params']['num_fr']) + print('-------') + post_processing(debug_mode) + + # for the last frame, save all the info to file + if end_flag==1: + for n in range(track_struct['tracklet_mat']['xmin_mat'].shape[1]): + + track_idx = np.where(track_struct['tracklet_mat']['xmin_mat'][:,n]!=-1)[0] + num_save_id = len(track_idx) + if num_save_id!=0: + save_mat = np.zeros((num_save_id, 12)) + save_mat[:,0] = fr_idx-track_struct['track_params']['num_fr']+n+1 + save_mat[:,1] = track_struct['tracklet_mat']['obj_id_mat'][track_idx] + track_struct['tracklet_mat']['save_obj_id_mask'][save_mat[:,1].astype(int)] = 1 + save_mat[:,2] = track_struct['tracklet_mat']['track_id_mat'][track_idx] + save_mat[:,3] = track_struct['tracklet_mat']['xmin_mat'][track_idx,n] + save_mat[:,4] = track_struct['tracklet_mat']['ymin_mat'][track_idx,n] + save_mat[:,5] = track_struct['tracklet_mat']['xmax_mat'][track_idx,n] \ + -track_struct['tracklet_mat']['xmin_mat'][track_idx,n] + save_mat[:,6] = track_struct['tracklet_mat']['ymax_mat'][track_idx,n] \ + -track_struct['tracklet_mat']['ymin_mat'][track_idx,n] + save_mat[:,7] = track_struct['tracklet_mat']['x_3d_mat'][track_idx,n] + save_mat[:,8] = track_struct['tracklet_mat']['y_3d_mat'][track_idx,n] + save_mat[:,9] = track_struct['tracklet_mat']['w_3d_mat'][track_idx,n] + save_mat[:,10] = track_struct['tracklet_mat']['h_3d_mat'][track_idx,n] + save_mat[:,11] = track_struct['tracklet_mat']['det_score_mat'][track_idx,n] + #save_mat[:,12] = track_struct['tracklet_mat']['class_name'][track_idx,n] + #save_mat[:,13] = track_struct['tracklet_mat']['dist2cam'][track_idx,n] + f = open(track_struct['file_path']['txt_result_path'], 'a') + np.savetxt(f, save_mat, delimiter=',') + f.close() + wrt_missing_det(save_mat) + + #import pdb; pdb.set_trace() + t_pointer = prev_t_pointer + return + +def init_TC_tracker(): + global track_struct + + track_struct = {'track_params':{}, 'file_path':{}} + track_struct['file_path']['seq_name'] = '1' + track_struct['file_path']['img_name'] = '1' + track_struct['file_path']['sub_seq_name'] = '' + # track_struct['file_path']['det_path'] = 'D:/Data/KITTI/'+track_struct['file_path']['seq_name']+'/dets2.txt' + # track_struct['file_path']['img_folder'] = 'D:/Data/KITTI/'+track_struct['file_path']['img_name'] \ + # +track_struct['file_path']['sub_seq_name']+'/image_02/data' + # track_struct['file_path']['crop_det_folder'] = 'D:/Data/KITTI/temp_crop' + # track_struct['file_path']['triplet_model'] = 'D:/Data/UA-Detrac/UA_Detrac_model/KITTI_model' + # track_struct['file_path']['seq_model'] = 'D:/Data/UA-Detrac/KITTI_model/model.ckpt' + # track_struct['file_path']['tracking_img_folder'] = 'D:/Data/KITTI/tracking_img/'+track_struct['file_path']['seq_name'] \ + # +track_struct['file_path']['sub_seq_name'] + # track_struct['file_path']['tracking_video_path'] = 'D:/Data/KITTI/tracking_video/'+track_struct['file_path']['seq_name'] \ + # +track_struct['file_path']['sub_seq_name']+'.avi' + # track_struct['file_path']['txt_result_path'] = 'D:/Data/KITTI/txt_result/'+track_struct['file_path']['seq_name'] \ + # +track_struct['file_path']['sub_seq_name']+'.txt' + # if os.path.isfile(track_struct['file_path']['txt_result_path']): + # os.remove(track_struct['file_path']['txt_result_path']) + # track_struct['file_path']['track_struct_path'] = 'D:/Data/KITTI/track_struct/'+track_struct['file_path']['seq_name'] \ + # +track_struct['file_path']['sub_seq_name']+'.obj' + + + + track_struct['file_path']['det_path'] = 'C:/Users/tangz/OneDrive/Documents/Gaoang/chongqing/det/1_new.txt' + track_struct['file_path']['img_folder'] = 'C:/Users/tangz/OneDrive/Documents/Gaoang/chongqing/img_folder/1' + track_struct['file_path']['crop_det_folder'] = 'C:/Users/tangz/OneDrive/Documents/Gaoang/chongqing/temp' + track_struct['file_path']['triplet_model'] = 'C:/Users/tangz/OneDrive/Documents/Gaoang/update_facenet/UA_Detrac_model/MOT' + track_struct['file_path']['seq_model'] = 'C:/Users/tangz/OneDrive/Documents/Gaoang/MOT17/MOT_2d_v2/model.ckpt' + track_struct['file_path']['tracking_img_folder'] = 'C:/Users/tangz/OneDrive/Documents/Gaoang/chongqing/tracking_img/'+track_struct['file_path']['seq_name'] \ + +track_struct['file_path']['sub_seq_name'] + track_struct['file_path']['tracking_video_path'] = 'C:/Users/tangz/OneDrive/Documents/Gaoang/chongqing/tracking_video/'+track_struct['file_path']['seq_name'] \ + +track_struct['file_path']['sub_seq_name']+'.avi' + track_struct['file_path']['txt_result_path'] = 'C:/Users/tangz/OneDrive/Documents/Gaoang/chongqing/txt_result/'+track_struct['file_path']['seq_name'] \ + +track_struct['file_path']['sub_seq_name']+'.txt' + + if os.path.isfile(track_struct['file_path']['txt_result_path']): + os.remove(track_struct['file_path']['txt_result_path']) + + track_struct['file_path']['track_struct_path'] = 'C:/Users/tangz/OneDrive/Documents/Gaoang/chongqing/appear_mat/'+track_struct['file_path']['seq_name'] \ + +track_struct['file_path']['sub_seq_name']+'.obj' + + + track_struct['track_params']['num_fr'] = 64 + track_struct['track_params']['num_track'] = 1000 + track_struct['track_params']['num_max_det'] = 10000 + track_struct['track_params']['max_num_obj'] = 10000 + track_struct['track_params']['IOU_thresh'] = 0.5#0.3 + track_struct['track_params']['color_thresh'] = 5 + track_struct['track_params']['det_thresh'] = -2 + track_struct['track_params']['linear_pred_thresh'] = 5 + track_struct['track_params']['t_dist_thresh'] = 60 + track_struct['track_params']['track_overlap_thresh'] = 0 + track_struct['track_params']['search_radius'] = 1 + track_struct['track_params']['const_fr_thresh'] = 1 + track_struct['track_params']['crop_size'] = 182 + track_struct['track_params']['loc_scales'] = [1352,700,1352,700]#[100,30,5,5] + track_struct['track_params']['clustering_period'] = 20 + track_struct['track_params']['time_cluster_dist'] = 100 + track_struct['track_params']['file_name_len'] = 6 + track_struct['track_params']['num_time_cluster'] \ + = int(np.ceil(track_struct['track_params']['num_fr']/track_struct['track_params']['time_cluster_dist'])) + + track_struct['track_params']['max_length'] = 64 + track_struct['track_params']['feature_size'] = 4+512 + track_struct['track_params']['batch_size'] = 64 + track_struct['track_params']['num_classes'] = 2 + + + track_struct['tracklet_mat'] = {'track_id_mat':[], 'xmin_mat':[], 'ymin_mat':[], 'xmax_mat':[], 'ymax_mat':[], 'x_3d_mat':[], + 'y_3d_mat':[], 'w_3d_mat':[], 'h_3d_mat':[], 'det_score_mat':[], 'track_interval':[], + 'obj_id_mat':[], 'appearance_fea_mat':[]} + + track_struct['tracklet_mat']['track_id_mat'] = -np.ones(track_struct['track_params']['num_track'], dtype=int) + track_struct['tracklet_mat']['obj_id_mat'] = -np.ones(track_struct['track_params']['num_track'], dtype=int) + track_struct['tracklet_mat']['xmin_mat'] = -np.ones((track_struct['track_params']['num_track'], + track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['ymin_mat'] = -np.ones((track_struct['track_params']['num_track'], + track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['xmax_mat'] = -np.ones((track_struct['track_params']['num_track'], + track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['ymax_mat'] = -np.ones((track_struct['track_params']['num_track'], + track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['x_3d_mat'] = -np.ones((track_struct['track_params']['num_track'], + track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['y_3d_mat'] = -np.ones((track_struct['track_params']['num_track'], + track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['w_3d_mat'] = -np.ones((track_struct['track_params']['num_track'], + track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['h_3d_mat'] = -np.ones((track_struct['track_params']['num_track'], + track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['det_score_mat'] = \ + -np.ones((track_struct['track_params']['num_track'],track_struct['track_params']['num_fr'])) + + #track_struct['tracklet_mat']['class_name'] = -np.ones((track_struct['track_params']['num_track'], + # track_struct['track_params']['num_fr'])) + #track_struct['tracklet_mat']['dist2cam'] = -np.ones((track_struct['track_params']['num_track'], + # track_struct['track_params']['num_fr'])) + + track_struct['tracklet_mat']['track_interval'] = -np.ones((track_struct['track_params']['num_track'],2), dtype=int) + track_struct['tracklet_mat']['prev_track_interval'] = -np.ones((track_struct['track_params']['num_track'],2), dtype=int) + track_struct['tracklet_mat']['appearance_fea_mat'] = -np.ones((track_struct['track_params']['num_max_det'], + track_struct['track_params']['feature_size']-4+2)) + + track_struct['tracklet_mat']['comb_track_cost'] = np.zeros((track_struct['track_params']['num_track'], + track_struct['track_params']['num_track'])) + track_struct['tracklet_mat']['comb_track_cost_mask'] = np.zeros((track_struct['track_params']['num_track'], + track_struct['track_params']['num_track']),dtype=int) + track_struct['tracklet_mat']['save_obj_id_mask'] = np.zeros(track_struct['track_params']['max_num_obj'],dtype=int) + track_struct['tracklet_mat']['assigned_obj_id_mask'] = np.zeros(track_struct['track_params']['max_num_obj'],dtype=int) + track_struct['tracklet_mat']['imgs'] = [] + track_struct['tracklet_mat']['color_table'] = track_lib.color_table(track_struct['track_params']['max_num_obj']) + + # fr_id, track_id, xmin, ymin, xmax, ymax, x, y, w, h, det_score + track_struct['tracklet_mat']['obj_end_fr_info'] = -np.ones((track_struct['track_params']['max_num_obj'],11)) + + # remove folder + if os.path.isdir(track_struct['file_path']['crop_det_folder']): + shutil.rmtree(track_struct['file_path']['crop_det_folder']) + + return + +def TC_tracker(): + global track_struct + init_TC_tracker() + + # initialize triplet model + global triplet_graph + global triplet_sess + init_triplet_model() + + # initialize tracklet model + global tracklet_graph + global tracklet_sess + init_tracklet_model() + + M = track_lib.load_detection(track_struct['file_path']['det_path'], 'chongqing') + total_num_fr = int(M[-1,0]+1) + + t_pointer = 0 + for n in range(total_num_fr): + print("Frame %d" % n) + # print("t_pointer %d" % t_pointer) + fr_idx = n + idx = np.where(np.logical_and(M[:,0]==fr_idx,M[:,5]>track_struct['track_params']['det_thresh']))[0] + if len(idx)>1: + choose_idx, _ = track_lib.merge_bbox(M[idx,1:5], 0.7, M[idx,5],1) + #import pdb; pdb.set_trace() + temp_M = M[idx[choose_idx],:] + else: + temp_M = M[idx,:] + + img_name = track_lib.file_name(fr_idx+1,track_struct['track_params']['file_name_len'])+'.jpg' + img_path = track_struct['file_path']['img_folder']+'/'+img_name + img = misc.imread(img_path) + + if fr_idx==total_num_fr-1: + end_flag = 1 + else: + end_flag = 0 + + TC_online(temp_M, img, t_pointer, fr_idx, end_flag) + t_pointer = t_pointer+1 + if t_pointer>track_struct['track_params']['num_fr']: + t_pointer = track_struct['track_params']['num_fr'] + + + # draw all results + M = np.loadtxt(track_struct['file_path']['txt_result_path'], delimiter=',') + M = np.array(M) + for n in range(total_num_fr): + fr_idx = n + img_name = track_lib.file_name(fr_idx+1,track_struct['track_params']['file_name_len'])+'.jpg' + img_path = track_struct['file_path']['img_folder']+'/'+img_name + img = misc.imread(img_path) + + temp_M = M[M[:,0]==fr_idx,:] + draw_result(img, temp_M, fr_idx) + + convert_frames_to_video(track_struct['file_path']['tracking_img_folder']+'/', track_struct['file_path']['tracking_video_path'], 30) + + return track_struct + +if __name__ == '__main__': + + TC_tracker() + diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/tracklet_utils_3c.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/tracklet_utils_3c.py new file mode 100644 index 0000000000000000000000000000000000000000..e3d49442be4043f50d58f35819b7489380b805c6 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/tracklet_utils_3c.py @@ -0,0 +1,3151 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +/* + * Copyright ©2019 Gaoang Wang. All rights reserved. Permission is + * hereby granted for academic use. No other use, copying, distribution, or modification + * is permitted without prior written consent. Copyrights for + * third-party components of this work must be honored. Instructors + * interested in reusing these course materials should contact the + * author. + */ + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +import numpy as np +import argparse +import facenet +import lfw +import os +import sys +import cv2 +import pickle +import time +from functools import wraps + +from tensorflow.python.ops import data_flow_ops +from sklearn import metrics +from scipy.optimize import brentq +from scipy import interpolate +from scipy.interpolate import interp1d +from scipy.io import loadmat +from skimage.io import imread +from scipy import misc +from scipy import stats +from scipy import spatial +import matplotlib.pyplot as plt +import matplotlib.patches as patches +from PIL import Image +from sklearn import svm +from sklearn.externals import joblib +from sklearn.ensemble import RandomForestClassifier +from sklearn.datasets import make_classification +from collections import Counter + +import seq_nn_3d_v2 +import track_lib + + +# Set paths +seq_name = 'basketball_6943' +img_name = 'basketball_6943' +sub_seq_name = '' +file_len = 8 + +det_path = 'D:/Data/basketball_6943_results/det.txt' +gt_path = '' +img_folder = 'D:/Data/basketball_6943_results/img' +crop_det_folder = 'D:/Data/basketball_6943_results/crop_det/'+seq_name+sub_seq_name +triplet_model = 'D:/Data/UA-Detrac/UA_Detrac_model/MOT' +seq_model = 'D:/Data/MOT/MOT_2d_v2/model.ckpt' +tracking_img_folder = 'D:/Data/basketball_6943_results/tracking_img/'+seq_name+sub_seq_name +tracking_video_path = 'D:/Data/basketball_6943_results/tracking_video/'+seq_name+sub_seq_name+'.avi' +appear_mat_path = 'D:/Data/basketball_6943_results/appear_mat/'+seq_name+'.obj' +txt_result_path = 'D:/Data/basketball_6943_results/txt_result/'+seq_name+sub_seq_name+'.txt' +track_struct_path = 'D:/Data/basketball_6943_results/track_struct/'+seq_name+sub_seq_name+'.obj' + +''' +seq_name = 'MOT17-02-FRCNN' +img_name = 'MOT17-02' +sub_seq_name = '' +det_path = 'D:/Data/MOT/MOT17Labels/train/'+seq_name+'/det/det.txt' +gt_path = 'D:/Data/MOT/MOT17Labels/train/'+seq_name+'/gt/gt.txt' +img_folder = 'D:/Data/MOT/MOT17Det/train/'+img_name+sub_seq_name+'/img1' +crop_det_folder = 'D:/Data/MOT/crop_det/'+seq_name+sub_seq_name +triplet_model = 'D:/Data/UA-Detrac/UA_Detrac_model/MOT' +#triplet_model = 'D:/Data/UA-Detrac/UA_Detrac_model/KITTI_model' +#seq_model = 'D:/Data/UA-Detrac/cnn_appear_model_517_128_16600steps/model.ckpt' +#seq_model = 'D:/Data/UA-Detrac/cnn_MOT/model.ckpt' +seq_model = 'D:/Data/UA-Detrac/MOT_2d_v2/model.ckpt' +#seq_model = 'D:/Data/UA-Detrac/semi_train_model/model.ckpt' +tracking_img_folder = 'D:/Data/MOT/tracking_img/'+seq_name+sub_seq_name +tracking_video_path = 'D:/Data/MOT/tracking_video/'+seq_name+sub_seq_name+'.avi' +svm_model_path = 'D:/Data/MOT/MOT17_train_det_crop/cnn_svm_MOT17.pkl' +rand_forest_model_path = 'D:/Data/MOT/MOT17_train_det_crop/rand_forest_MOT17_FRCNN.pkl' +F_path = 'D:/Data/MOT/geometry_info/'+img_name+'_F_set.mat' + +appear_mat_path = 'D:/Data/MOT/appear_mat/'+seq_name+'.obj' + +save_fea_path = 'D:/Data/MOT/save_fea_mat/'+seq_name+sub_seq_name+'.obj' +save_label_path = 'D:/Data/MOT/save_fea_mat/'+seq_name+sub_seq_name+'_label.obj' +save_remove_path = 'D:/Data/MOT/save_fea_mat/'+seq_name+sub_seq_name+'_remove_set.obj' +save_all_fea_path = 'D:/Data/MOT/save_fea_mat/'+seq_name+sub_seq_name+'_all.obj' +save_all_label_path = 'D:/Data/MOT/save_fea_mat/'+seq_name+sub_seq_name+'_all_label.obj' + +save_all_label_path1 = 'D:/Data/MOT/save_fea_mat/'+seq_name+sub_seq_name+'_all_label0.obj' +save_all_label_path2 = 'D:/Data/MOT/save_fea_mat/'+seq_name+sub_seq_name+'_all_label1.obj' +save_all_label_path3 = 'D:/Data/MOT/save_fea_mat/'+seq_name+sub_seq_name+'_all_label2.obj' +save_all_label_path4 = 'D:/Data/MOT/save_fea_mat/'+seq_name+sub_seq_name+'_all_label3.obj' + +txt_result_path = 'D:/Data/MOT/txt_result/'+seq_name+sub_seq_name+'.txt' +track_struct_path = 'D:/Data/MOT/track_struct/'+seq_name+sub_seq_name+'.obj' +''' + +max_length = 64 +feature_size = 4+512 +batch_size = 64 +num_classes = 2 + + +track_set = [] +remove_set = [] + + +#track_set = pickle.load(open(save_label_path,'rb')) +#remove_set = pickle.load(open(save_remove_path,'rb')) + + +#save_fea_mat = np.zeros((len(track_set),feature_size,max_length,2)) + + +global all_fea_mat +global all_fea_label +all_fea_mat = np.zeros((10000,feature_size,max_length,3)) +all_fea_label = np.zeros((10000,4)) + + + + +def get_tracklet_scores(): + global track_struct + + # svm score + track_struct['tracklet_mat']['svm_score_mat'] = -1*np.ones((track_struct['tracklet_mat']['xmin_mat'].shape[0], \ + track_struct['tracklet_mat']['xmin_mat'].shape[1])) + num_det = track_struct['tracklet_mat']['appearance_fea_mat'].shape[0] + clf = joblib.load(svm_model_path) + pred_s = np.zeros((num_det,1)) + pred_s[:,0] = clf.decision_function(track_struct['tracklet_mat']['appearance_fea_mat'][:,2:]) + for n in range(num_det): + track_struct['tracklet_mat']['svm_score_mat'][int(track_struct['tracklet_mat']['appearance_fea_mat'][n,0])-1, \ + int(track_struct['tracklet_mat']['appearance_fea_mat'][n,1])-1] = pred_s[n,0] + + # h_score and y_score + track_struct['tracklet_mat']['h_score_mat'] = -1*np.ones((track_struct['tracklet_mat']['xmin_mat'].shape[0], \ + track_struct['tracklet_mat']['xmin_mat'].shape[1])) + track_struct['tracklet_mat']['y_score_mat'] = -1*np.ones((track_struct['tracklet_mat']['xmin_mat'].shape[0], \ + track_struct['tracklet_mat']['xmin_mat'].shape[1])) + hloc = np.zeros(num_det) + yloc = np.zeros(num_det) + cnt = 0 + for n in range(track_struct['tracklet_mat']['xmin_mat'].shape[0]): + idx = np.where(track_struct['tracklet_mat']['xmin_mat'][n,:]!=-1)[0] + hloc[cnt:cnt+len(idx)] = track_struct['tracklet_mat']['ymax_mat'][n,idx]-track_struct['tracklet_mat']['ymin_mat'][n,idx] + yloc[cnt:cnt+len(idx)] = track_struct['tracklet_mat']['ymax_mat'][n,idx] + cnt = cnt+len(idx) + + ph, py = track_lib.estimate_h_y(hloc, yloc) + + A = np.ones((hloc.shape[0],2)) + A[:,0] = yloc + y_err = (np.matmul(A,ph)-hloc)/hloc + err_std = np.std(y_err) + h_score = np.zeros((y_err.shape[0],1)) + h_score[:,0] = np.exp(-np.power(y_err,2)/(err_std*err_std)) + + A = np.ones((hloc.shape[0],2)) + A[:,0] = hloc + y_err = np.matmul(A,py)-yloc + err_std = np.std(y_err) + y_score = np.zeros((y_err.shape[0],1)) + y_score[:,0] = np.exp(-np.power(y_err,2)/(err_std*err_std)) + #import pdb; pdb.set_trace() + + cnt = 0 + for n in range(track_struct['tracklet_mat']['xmin_mat'].shape[0]): + idx = np.where(track_struct['tracklet_mat']['xmin_mat'][n,:]!=-1)[0] + track_struct['tracklet_mat']['h_score_mat'][n,idx] = h_score[cnt:cnt+len(idx),0] + track_struct['tracklet_mat']['y_score_mat'][n,idx] = y_score[cnt:cnt+len(idx),0] + cnt = cnt+len(idx) + return + +def remove_tracklet(tracklet_mat): + num_tracklet = tracklet_mat['xmin_mat'].shape[0] + tracklet_fea = np.zeros((num_tracklet,17)) + for n in range(num_tracklet): + idx = np.where(tracklet_mat['xmin_mat'][n,:]!=-1)[0] + tracklet_fea[n,:] = track_lib.extract_tracklet_feature(tracklet_mat, n, idx) + clf = joblib.load(rand_forest_model_path) + pred_label = clf.predict(tracklet_fea) + temp_remove_set = np.where(pred_label!=1)[0] + temp_remove_set = list(temp_remove_set) + #import pdb; pdb.set_trace() + return temp_remove_set + +def preprocessing(tracklet_mat, len_thresh, track_params): + new_tracklet_mat = tracklet_mat + N_tracklet = new_tracklet_mat['xmin_mat'].shape[0] + remove_idx = [] + for n in range(N_tracklet): + idx = np.where(new_tracklet_mat['xmin_mat'][n,:]!=-1)[0] + max_det_score = np.max(new_tracklet_mat['det_score_mat'][n,idx]) + if len(idx)0: + num1 = bbox1.shape[0] + else: + num1 = 0 + if len(bbox2)>0: + num2 = bbox2.shape[0] + else: + num2 = 0 + + new_track_id1 = track_id1 + new_tracklet_mat = tracklet_mat + if fr_idx2==2 and num1>0: + new_track_id1 = list(range(1,num1+1)) + ''' + new_tracklet_mat['xmin_mat'] = -np.ones((num1, num_fr)) + new_tracklet_mat['ymin_mat'] = -np.ones((num1, num_fr)) + new_tracklet_mat['xmax_mat'] = -np.ones((num1, num_fr)) + new_tracklet_mat['ymax_mat'] = -np.ones((num1, num_fr)) + new_tracklet_mat['det_score_mat'] = -np.ones((num1, num_fr)) + ''' + new_tracklet_mat['xmin_mat'][0:num1,0] = bbox1[:,0] + new_tracklet_mat['ymin_mat'][0:num1,0] = bbox1[:,1] + new_tracklet_mat['xmax_mat'][0:num1,0] = bbox1[:,0]+bbox1[:,2]-1 + new_tracklet_mat['ymax_mat'][0:num1,0] = bbox1[:,1]+bbox1[:,3]-1 + new_tracklet_mat['det_score_mat'][0:num1,0] = det_score1 + if track_params['svm_score_flag']==1: + new_tracklet_mat['svm_score_mat'][0:num1,0] = svm_score1 + if track_params['h_score_flag']==1: + new_tracklet_mat['h_score_mat'][0:num1,0] = h_score1 + if track_params['y_score_flag']==1: + new_tracklet_mat['y_score_mat'][0:num1,0] = y_score1 + if track_params['IOU_gt_flag']==1: + new_tracklet_mat['IOU_gt_mat'][0:num1,0] = IOU_gt1 + max_id = num1 + + if len(bbox1)==0 and len(bbox2)!=0: + idx1 = [] + idx2 = [] + elif len(bbox1)!=0 and len(bbox2)==0: + idx1 = [] + idx2 = [] + elif len(bbox1)==0 and len(bbox2)==0: + idx1 = [] + idx2 = [] + elif len(bbox1)!=0 and len(bbox2)!=0: + # pred bbox1 + pred_bbox1 = np.zeros((len(bbox1),4)) + if track_params['use_F']==1: + pred_bbox1 = track_lib.pred_bbox_by_F(bbox1, tracklet_mat['F'][:,:,fr_idx2-2], 0, [], []) + else: + for k in range(len(bbox1)): + temp_track_id = new_track_id1[k]-1 + t_idx = np.where(new_tracklet_mat['xmin_mat'][temp_track_id,:]!=-1)[0] + if len(t_idx)==0: + import pdb; pdb.set_trace() + t_min = np.min(t_idx) + if t_mincolor_thresh] = 0 + idx1, idx2 = track_lib.bbox_associate(overlap_mat, track_params['IOU_thresh']) + + # check tracklet generation + if len(M_gt)>0: + M1 = M_gt[M_gt[:,0]==fr_idx2-1,:] + M2 = M_gt[M_gt[:,0]==fr_idx2,:] + real_id1 = -np.ones(len(bbox1)) + real_id2 = -np.ones(len(bbox2)) + overlap_mat1,_,_,_ = track_lib.get_overlap(bbox1, M1[:,1:5]) + r_idx1, r_idx2 = track_lib.bbox_associate(overlap_mat1, 0.5) + if len(r_idx1)!=0: + real_id1[r_idx1] = M1[r_idx2,6] + overlap_mat2,_,_,_ = track_lib.get_overlap(bbox2, M2[:,1:5]) + r_idx1, r_idx2 = track_lib.bbox_associate(overlap_mat2, 0.5) + if len(r_idx1)!=0: + real_id2[r_idx1] = M2[r_idx2,6] + for k1 in range(len(idx1)): + if real_id1[idx1[k1]]==real_id2[idx2[k1]] and real_id1[idx1[k1]]!=-1: + new_tracklet_mat['conf_matrix_tracklet'][0,0] = new_tracklet_mat['conf_matrix_tracklet'][0,0]+1 + elif real_id1[idx1[k1]]!=real_id2[idx2[k1]]: + new_tracklet_mat['conf_matrix_tracklet'][0,1] = new_tracklet_mat['conf_matrix_tracklet'][0,1]+1 + for k1 in range(len(bbox1)): + if k1 not in idx1: + if real_id1[k1]!=-1 and real_id1[k1] in real_id2: + new_tracklet_mat['conf_matrix_tracklet'][1,0] = new_tracklet_mat['conf_matrix_tracklet'][1,0]+1 + + + if len(idx1)==0 and num2>0: + new_track_id2 = list(np.array(range(1,num2+1))+max_id) + ''' + new_tracklet_mat['xmin_mat'] = \ + np.append(new_tracklet_mat['xmin_mat'], -np.ones((num2,num_fr)), axis=0) + new_tracklet_mat['ymin_mat'] = \ + np.append(new_tracklet_mat['ymin_mat'], -np.ones((num2,num_fr)), axis=0) + new_tracklet_mat['xmax_mat'] = \ + np.append(new_tracklet_mat['xmax_mat'], -np.ones((num2,num_fr)), axis=0) + new_tracklet_mat['ymax_mat'] = \ + np.append(new_tracklet_mat['ymax_mat'], -np.ones((num2,num_fr)), axis=0) + new_tracklet_mat['det_score_mat'] = \ + np.append(new_tracklet_mat['det_score_mat'], -np.ones((num2,num_fr)), axis=0) + ''' + max_id = max_id+num2 + new_tracklet_mat['xmin_mat'][max_id-num2:max_id,fr_idx2-1] = bbox2[:,0] + new_tracklet_mat['ymin_mat'][max_id-num2:max_id,fr_idx2-1] = bbox2[:,1] + new_tracklet_mat['xmax_mat'][max_id-num2:max_id,fr_idx2-1] = bbox2[:,0]+bbox2[:,2]-1 + new_tracklet_mat['ymax_mat'][max_id-num2:max_id,fr_idx2-1] = bbox2[:,1]+bbox2[:,3]-1 + new_tracklet_mat['det_score_mat'][max_id-num2:max_id,fr_idx2-1] = det_score2 + if track_params['svm_score_flag']==1: + new_tracklet_mat['svm_score_mat'][max_id-num2:max_id,fr_idx2-1] = svm_score2 + if track_params['h_score_flag']==1: + new_tracklet_mat['h_score_mat'][max_id-num2:max_id,fr_idx2-1] = h_score2 + if track_params['y_score_flag']==1: + new_tracklet_mat['y_score_mat'][max_id-num2:max_id,fr_idx2-1] = y_score2 + if track_params['IOU_gt_flag']==1: + new_tracklet_mat['IOU_gt_mat'][max_id-num2:max_id,fr_idx2-1] = IOU_gt2 + elif len(idx1)>0: + new_track_id2 = [] + for n in range(num2): + #import pdb; pdb.set_trace() + temp_idx = np.where(idx2==n)[0] + if len(temp_idx)==0: + max_id = max_id+1 + new_track_id2.append(max_id) + ''' + new_tracklet_mat['xmin_mat'] = \ + np.append(new_tracklet_mat['xmin_mat'], -np.ones((1,num_fr)), axis=0) + new_tracklet_mat['ymin_mat'] = \ + np.append(new_tracklet_mat['ymin_mat'], -np.ones((1,num_fr)), axis=0) + new_tracklet_mat['xmax_mat'] = \ + np.append(new_tracklet_mat['xmax_mat'], -np.ones((1,num_fr)), axis=0) + new_tracklet_mat['ymax_mat'] = \ + np.append(new_tracklet_mat['ymax_mat'], -np.ones((1,num_fr)), axis=0) + new_tracklet_mat['det_score_mat'] = \ + np.append(new_tracklet_mat['det_score_mat'], -np.ones((1,num_fr)), axis=0) + ''' + #if fr_idx2==20: + # import pdb; pdb.set_trace() + new_tracklet_mat['xmin_mat'][max_id-1,fr_idx2-1] = bbox2[n,0] + new_tracklet_mat['ymin_mat'][max_id-1,fr_idx2-1] = bbox2[n,1] + new_tracklet_mat['xmax_mat'][max_id-1,fr_idx2-1] = bbox2[n,0]+bbox2[n,2]-1 + new_tracklet_mat['ymax_mat'][max_id-1,fr_idx2-1] = bbox2[n,1]+bbox2[n,3]-1 + new_tracklet_mat['det_score_mat'][max_id-1,fr_idx2-1] = det_score2[n] + if track_params['svm_score_flag']==1: + new_tracklet_mat['svm_score_mat'][max_id-1,fr_idx2-1] = svm_score2[n] + if track_params['h_score_flag']==1: + new_tracklet_mat['h_score_mat'][max_id-1,fr_idx2-1] = h_score2[n] + if track_params['y_score_flag']==1: + new_tracklet_mat['y_score_mat'][max_id-1,fr_idx2-1] = y_score2[n] + if track_params['IOU_gt_flag']==1: + new_tracklet_mat['IOU_gt_mat'][max_id-1,fr_idx2-1] = IOU_gt2[n] + else: + temp_idx = temp_idx[0] + new_track_id2.append(new_track_id1[idx1[temp_idx]]) + new_tracklet_mat['xmin_mat'] \ + [new_track_id1[idx1[temp_idx]]-1,fr_idx2-1] = bbox2[n,0] + new_tracklet_mat['ymin_mat'] \ + [new_track_id1[idx1[temp_idx]]-1,fr_idx2-1] = bbox2[n,1] + new_tracklet_mat['xmax_mat'] \ + [new_track_id1[idx1[temp_idx]]-1,fr_idx2-1] = bbox2[n,0]+bbox2[n,2]-1 + new_tracklet_mat['ymax_mat'] \ + [new_track_id1[idx1[temp_idx]]-1,fr_idx2-1] = bbox2[n,1]+bbox2[n,3]-1 + new_tracklet_mat['det_score_mat'] \ + [new_track_id1[idx1[temp_idx]]-1,fr_idx2-1] = det_score2[n] + if track_params['svm_score_flag']==1: + new_tracklet_mat['svm_score_mat'] \ + [new_track_id1[idx1[temp_idx]]-1,fr_idx2-1] = svm_score2[n] + if track_params['h_score_flag']==1: + new_tracklet_mat['h_score_mat'] \ + [new_track_id1[idx1[temp_idx]]-1,fr_idx2-1] = h_score2[n] + if track_params['y_score_flag']==1: + new_tracklet_mat['y_score_mat'] \ + [new_track_id1[idx1[temp_idx]]-1,fr_idx2-1] = y_score2[n] + if track_params['IOU_gt_flag']==1: + new_tracklet_mat['IOU_gt_mat'] \ + [new_track_id1[idx1[temp_idx]]-1,fr_idx2-1] = IOU_gt2[n] + else: + new_track_id2 = [] + + #if fr_idx2==20: + # import pdb; pdb.set_trace() + new_max_id = max_id + return new_tracklet_mat, new_track_id1, new_track_id2, new_max_id + +def init_clustering(): + + global track_struct + + N_tracklet = track_struct['tracklet_mat']['xmin_mat'].shape[0] + + # track interval + track_struct['tracklet_mat']['track_interval'] = np.zeros((N_tracklet, 2)) + + # track cluster + track_struct['tracklet_mat']['track_cluster'] = [] + + # track class + track_struct['tracklet_mat']['track_class'] = np.arange(N_tracklet, dtype=int) + + # time cluster + track_struct['tracklet_mat']['time_cluster'] = [] + for n in range(track_struct['track_params']['num_time_cluster']): + track_struct['tracklet_mat']['time_cluster'].append([]) + + track_struct['tracklet_mat']['track_cluster_t_idx'] = [] + for n in range(N_tracklet): + idx = np.where(track_struct['tracklet_mat']['xmin_mat'][n,:]!=-1)[0] + track_struct['tracklet_mat']['track_interval'][n,0] = np.min(idx) + track_struct['tracklet_mat']['track_interval'][n,1] = np.max(idx) + track_struct['tracklet_mat']['track_cluster'].append([n]) + + if n in remove_set: + track_struct['tracklet_mat']['track_cluster_t_idx'].append([-1]) + else: + min_time_cluster_idx = int(np.floor(max(track_struct['tracklet_mat']['track_interval'][n,0] + -track_struct['track_params']['t_dist_thresh']-5,0) + /track_struct['track_params']['time_cluster_dist'])) + max_time_cluster_idx = int(np.floor(min(track_struct['tracklet_mat']['track_interval'][n,1] + +track_struct['track_params']['t_dist_thresh']+5, + track_struct['tracklet_mat']['xmin_mat'].shape[1]-1) + /track_struct['track_params']['time_cluster_dist'])) + track_struct['tracklet_mat']['track_cluster_t_idx'].append(list(range(min_time_cluster_idx,max_time_cluster_idx+1))) + for k in range(min_time_cluster_idx,max_time_cluster_idx+1): + track_struct['tracklet_mat']['time_cluster'][k].append(n) + + # get center position of each detection location + mask = track_struct['tracklet_mat']['xmin_mat']==-1 + track_struct['tracklet_mat']['center_x'] = \ + (track_struct['tracklet_mat']['xmin_mat']+track_struct['tracklet_mat']['xmax_mat'])/2 + track_struct['tracklet_mat']['center_y'] = \ + (track_struct['tracklet_mat']['ymin_mat']+track_struct['tracklet_mat']['ymax_mat'])/2 + track_struct['tracklet_mat']['w'] = \ + track_struct['tracklet_mat']['xmax_mat']-track_struct['tracklet_mat']['xmin_mat']+1 + track_struct['tracklet_mat']['h'] = \ + track_struct['tracklet_mat']['ymax_mat']-track_struct['tracklet_mat']['ymin_mat']+1 + track_struct['tracklet_mat']['center_x'][mask] = -1 + track_struct['tracklet_mat']['center_y'][mask] = -1 + track_struct['tracklet_mat']['w'][mask] = -1 + track_struct['tracklet_mat']['h'][mask] = -1 + + # neighbor_track_idx and conflict_track_idx + track_struct['tracklet_mat']['neighbor_track_idx'] = [] + track_struct['tracklet_mat']['conflict_track_idx'] = [] + for n in range(N_tracklet): + track_struct['tracklet_mat']['neighbor_track_idx'].append([]) + track_struct['tracklet_mat']['conflict_track_idx'].append([]) + for n in range(N_tracklet-1): + for m in range(n+1, N_tracklet): + t_min1 = track_struct['tracklet_mat']['track_interval'][n,0] + t_max1 = track_struct['tracklet_mat']['track_interval'][n,1] + t_min2 = track_struct['tracklet_mat']['track_interval'][m,0] + t_max2 = track_struct['tracklet_mat']['track_interval'][m,1] + overlap_len = min(t_max2,t_max1)-max(t_min1,t_min2)+1 + overlap_r = overlap_len/(t_max1-t_min1+1+t_max2-t_min2+1-overlap_len) + if overlap_len>0 and overlap_r>track_struct['track_params']['track_overlap_thresh']: + track_struct['tracklet_mat']['conflict_track_idx'][n].append(m) + track_struct['tracklet_mat']['conflict_track_idx'][m].append(n) + continue + if overlap_len>0 and overlap_r<=track_struct['track_params']['track_overlap_thresh']: + # check the search region + t1 = int(max(t_min1,t_min2)) + t2 = int(min(t_max2,t_max1)) + if (t_min1<=t_min2 and t_max1>=t_max2) or (t_min1>=t_min2 and t_max1<=t_max2) or overlap_len>4: + track_struct['tracklet_mat']['conflict_track_idx'][n].append(m) + track_struct['tracklet_mat']['conflict_track_idx'][m].append(n) + continue + + cand_t = np.array(range(t1,t2+1)) + dist_x = abs(track_struct['tracklet_mat']['center_x'][n,cand_t] \ + -track_struct['tracklet_mat']['center_x'][m,cand_t]) + dist_y = abs(track_struct['tracklet_mat']['center_y'][n,cand_t] \ + -track_struct['tracklet_mat']['center_y'][m,cand_t]) + w1 = track_struct['tracklet_mat']['w'][n,cand_t] + h1 = track_struct['tracklet_mat']['h'][n,cand_t] + w2 = track_struct['tracklet_mat']['w'][m,cand_t] + h2 = track_struct['tracklet_mat']['h'][m,cand_t] + + min_len = np.min([np.min(w1),np.min(h1),np.min(w2),np.min(h2)]) + min_dist_x1 = np.min(dist_x/min_len) + min_dist_y1 = np.min(dist_y/min_len) + min_dist_x2 = np.min(dist_x/min_len) + min_dist_y2 = np.min(dist_y/min_len) + if min_dist_x1=t_max2: + t1 = int(t_min1) + t2 = int(t_max2) + else: + t1 = int(t_max1) + t2 = int(t_min2) + + #*********************************** + tr_t1 = np.array(range(int(t_min1),int(t_max1+1))) + tr_x1 = track_struct['tracklet_mat']['center_x'][n,int(t_min1):int(t_max1+1)] + tr_y1 = track_struct['tracklet_mat']['center_y'][n,int(t_min1):int(t_max1+1)] + if len(tr_t1)>10: + if t_min1>=t_max2: + tr_t1 = tr_t1[0:10] + tr_x1 = tr_x1[0:10] + tr_y1 = tr_y1[0:10] + else: + tr_t1 = tr_t1[-10:] + tr_x1 = tr_x1[-10:] + tr_y1 = tr_y1[-10:] + ts_x1 = track_lib.linear_pred_v2(tr_t1, tr_x1, np.array([t2])) + ts_y1 = track_lib.linear_pred_v2(tr_t1, tr_y1, np.array([t2])) + dist_x1 = abs(ts_x1[0]-track_struct['tracklet_mat']['center_x'][m,t2]) + dist_y1 = abs(ts_y1[0]-track_struct['tracklet_mat']['center_y'][m,t2]) + + tr_t2 = np.array(range(int(t_min2),int(t_max2+1))) + tr_x2 = track_struct['tracklet_mat']['center_x'][m,int(t_min2):int(t_max2+1)] + tr_y2 = track_struct['tracklet_mat']['center_y'][m,int(t_min2):int(t_max2+1)] + if len(tr_t2)>10: + if t_min2>t_max1: + tr_t2 = tr_t2[0:10] + tr_x2 = tr_x2[0:10] + tr_y2 = tr_y2[0:10] + else: + tr_t2 = tr_t2[-10:] + tr_x2 = tr_x2[-10:] + tr_y2 = tr_y2[-10:] + + ts_x2 = track_lib.linear_pred_v2(tr_t2, tr_x2, np.array([t1])) + ts_y2 = track_lib.linear_pred_v2(tr_t2, tr_y2, np.array([t1])) + dist_x2 = abs(ts_x2[0]-track_struct['tracklet_mat']['center_x'][n,t1]) + dist_y2 = abs(ts_y2[0]-track_struct['tracklet_mat']['center_y'][n,t1]) + + dist_x = min(dist_x1, dist_x2) + dist_y = min(dist_y1, dist_y2) + #*********************************** + + #import pdb; pdb.set_trace() + ''' + dist_x = abs(track_struct['tracklet_mat']['center_x'][n,t1] \ + -track_struct['tracklet_mat']['center_x'][m,t2]) + dist_y = abs(track_struct['tracklet_mat']['center_y'][n,t1] \ + -track_struct['tracklet_mat']['center_y'][m,t2]) + ''' + + w1 = track_struct['tracklet_mat']['w'][n,t1] + h1 = track_struct['tracklet_mat']['h'][n,t1] + w2 = track_struct['tracklet_mat']['w'][m,t2] + h2 = track_struct['tracklet_mat']['h'][m,t2] + + min_len = np.min([np.min(w1),np.min(h1),np.min(w2),np.min(h2)]) + min_dist_x1 = dist_x/min_len + min_dist_y1 = dist_y/min_len + min_dist_x2 = dist_x/min_len + min_dist_y2 = dist_y/min_len + + if min_dist_x160: + continue + + if track_set[n,0] not in track_struct['tracklet_mat']['neighbor_track_idx'][track_set[n,1]]: + track_struct['tracklet_mat']['neighbor_track_idx'][track_set[n,1]].append(track_set[n,0]) + track_struct['tracklet_mat']['neighbor_track_idx'][track_set[n,0]].append(track_set[n,1]) + if track_set[n,0] in track_struct['tracklet_mat']['conflict_track_idx'][track_set[n,1]]: + track_struct['tracklet_mat']['conflict_track_idx'][track_set[n,1]].remove(track_set[n,0]) + track_struct['tracklet_mat']['conflict_track_idx'][track_set[n,0]].remove(track_set[n,1]) + + else: + if track_set[n,0] in track_struct['tracklet_mat']['neighbor_track_idx'][track_set[n,1]]: + track_struct['tracklet_mat']['neighbor_track_idx'][track_set[n,1]].remove(track_set[n,0]) + track_struct['tracklet_mat']['neighbor_track_idx'][track_set[n,0]].remove(track_set[n,1]) + if track_set[n,0] not in track_struct['tracklet_mat']['conflict_track_idx'][track_set[n,1]]: + track_struct['tracklet_mat']['conflict_track_idx'][track_set[n,1]].append(track_set[n,0]) + track_struct['tracklet_mat']['conflict_track_idx'][track_set[n,0]].append(track_set[n,1]) + + + # cluster cost + track_struct['tracklet_mat']['cluster_cost'] = [] + for n in range(N_tracklet): + #track_struct['tracklet_mat']['cluster_cost'].append(0) + # bias term + #*************************************** + track_struct['tracklet_mat']['cluster_cost'].append(track_struct['track_params']['cost_bias']) + + # save all comb cost for two tracklets + # comb_track_cost [track_id1, track_id2, cost] + # track_struct['tracklet_mat']['comb_track_cost'] = [] + + # save feature mat for training + ''' + if len(track_struct['tracklet_mat']['track_set'])>0: + track_struct['tracklet_mat']['save_fea_mat'] = np.zeros((len(track_struct['tracklet_mat']['track_set']), feature_size, max_length, 2)) + else: + track_struct['tracklet_mat']['save_fea_mat'] = [] + ''' + return + +def comb_cost(tracklet_set, feature_size, max_length, img_size, sess, + batch_X_x, batch_X_y, batch_X_w, batch_X_h, batch_X_a, batch_mask_1, + batch_mask_2, batch_Y, keep_prob, y_conv): + #comb_track_cost_list = tracklet_mat['comb_track_cost'].copy() + #comb_track_cost = np.array(tracklet_mat['comb_track_cost'].copy()) + #save_fea_mat = tracklet_mat['save_fea_mat'].copy() + #track_set = tracklet_mat['track_set'].copy() + + global track_struct + global all_fea_mat + global all_fea_label + #import pdb; pdb.set_trace() + tracklet_mat = track_struct['tracklet_mat'] + + ''' + temp_sum = np.sum(all_fea_mat[:,4,:,1], axis=1) + if len(np.where(temp_sum!=0)[0])==0: + fea_id = 0 + else: + fea_id = int(np.max(np.where(temp_sum!=0)[0]))+1 + ''' + + #print(fea_id) + #import pdb; pdb.set_trace() + # cnn classifier + N_tracklet = len(tracklet_set) + track_interval = tracklet_mat['track_interval'] + sort_idx = np.argsort(track_interval[np.array(tracklet_set),1]) + cost = 0 + if len(sort_idx)<=1: + return cost + + + remove_ids = [] + #comb_fea_mat = np.zeros((len(sort_idx)-1,feature_size,max_length,2)) + #comb_fea_label = np.zeros((len(sort_idx)-1,4)) + + comb_fea_mat = np.zeros((int(len(sort_idx)*(len(sort_idx)-1)/2),feature_size,max_length,3)) + comb_fea_label = np.zeros((int(len(sort_idx)*(len(sort_idx)-1)/2),4)) + + temp_cost_list = [] + X1 = [] + X2 = [] + #print(len(comb_track_cost)) + cnt = -1 + for n in range(0, len(sort_idx)-1): + for kk in range(n+1,len(sort_idx)): + cnt = int(cnt+1) + track_id1 = tracklet_set[sort_idx[n]] + track_id2 = tracklet_set[sort_idx[kk]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + remove_ids.append(cnt) + continue + + if tracklet_mat['comb_track_cost_mask'][track_id1,track_id2]==1: + cost = cost+tracklet_mat['comb_track_cost'][track_id1,track_id2] + remove_ids.append(cnt) + continue + + comb_fea_label[cnt,0] = track_id1 + comb_fea_label[cnt,1] = track_id2 + + #if track_id1==32 and track_id2==46: + # import pdb; pdb.set_trace() + ''' + start_time = time.time() + if len(comb_track_cost)>0: + search_idx = np.where(np.logical_and(comb_track_cost[:,0]==track_id1, comb_track_cost[:,1]==track_id2)) + if len(search_idx[0])>0: + remove_ids.append(n) + #import pdb; pdb.set_trace() + cost = cost+comb_track_cost[search_idx[0][0],2] + elapsed_time = time.time() - start_time + print(elapsed_time) + continue + ''' + temp_cost_list.append([track_id1,track_id2]) + + + # t starts from 0 + #import pdb; pdb.set_trace() + t1_min = int(track_interval[track_id1,0]) + t1_max = int(track_interval[track_id1,1]) + t2_min = int(track_interval[track_id2,0]) + t2_max = int(track_interval[track_id2,1]) + t_min = int(min(t1_min,t2_min)) + t_max = int(max(t1_max,t2_max)) + + if t_max-t_min+1<=max_length: + comb_fea_mat[cnt,:,t1_min-t_min:t1_max-t_min+1,1] = 1 + comb_fea_mat[cnt,0,t1_min-t_min:t1_max-t_min+1,0] = 0.5*(tracklet_mat['xmin_mat'][track_id1,t1_min:t1_max+1] + +tracklet_mat['xmax_mat'][track_id1,t1_min:t1_max+1])/img_size[1] + comb_fea_mat[cnt,1,t1_min-t_min:t1_max-t_min+1,0] = 0.5*(tracklet_mat['ymin_mat'][track_id1,t1_min:t1_max+1] + +tracklet_mat['ymax_mat'][track_id1,t1_min:t1_max+1])/img_size[0] + comb_fea_mat[cnt,2,t1_min-t_min:t1_max-t_min+1,0] = (tracklet_mat['xmax_mat'][track_id1,t1_min:t1_max+1] + -tracklet_mat['xmin_mat'][track_id1,t1_min:t1_max+1]+1)/img_size[1] + comb_fea_mat[cnt,3,t1_min-t_min:t1_max-t_min+1,0] = (tracklet_mat['ymax_mat'][track_id1,t1_min:t1_max+1] + -tracklet_mat['ymin_mat'][track_id1,t1_min:t1_max+1]+1)/img_size[0] + cand_idx = np.where(tracklet_mat['appearance_fea_mat'][:,0]==track_id1+1)[0] + + if comb_fea_mat[cnt,4:,t1_min-t_min:t1_max-t_min+1,0].shape[1]!=np.transpose(tracklet_mat['appearance_fea_mat'] \ + [cand_idx,2:]).shape[1]: + import pdb; pdb.set_trace() + comb_fea_mat[cnt,4:,t1_min-t_min:t1_max-t_min+1,0] = np.transpose(tracklet_mat['appearance_fea_mat'][cand_idx,2:]) + X1.append(tracklet_mat['appearance_fea_mat'][cand_idx,2:]) + comb_fea_mat[cnt,:,t2_min-t_min:t2_max-t_min+1,2] = 1 + #print(t_min) + #print(t2_min) + #print(t2_max) + #import pdb; pdb.set_trace() + comb_fea_mat[cnt,0,t2_min-t_min:t2_max-t_min+1,0] = 0.5*(tracklet_mat['xmin_mat'][track_id2,t2_min:t2_max+1] + +tracklet_mat['xmax_mat'][track_id2,t2_min:t2_max+1])/img_size[1] + comb_fea_mat[cnt,1,t2_min-t_min:t2_max-t_min+1,0] = 0.5*(tracklet_mat['ymin_mat'][track_id2,t2_min:t2_max+1] + +tracklet_mat['ymax_mat'][track_id2,t2_min:t2_max+1])/img_size[0] + comb_fea_mat[cnt,2,t2_min-t_min:t2_max-t_min+1,0] = (tracklet_mat['xmax_mat'][track_id2,t2_min:t2_max+1] + -tracklet_mat['xmin_mat'][track_id2,t2_min:t2_max+1]+1)/img_size[1] + comb_fea_mat[cnt,3,t2_min-t_min:t2_max-t_min+1,0] = (tracklet_mat['ymax_mat'][track_id2,t2_min:t2_max+1] + -tracklet_mat['ymin_mat'][track_id2,t2_min:t2_max+1]+1)/img_size[0] + cand_idx = np.where(tracklet_mat['appearance_fea_mat'][:,0]==track_id2+1)[0] + if comb_fea_mat[cnt,4:,t2_min-t_min:t2_max-t_min+1,0].shape[1]!=np.transpose(tracklet_mat['appearance_fea_mat'] \ + [cand_idx,2:]).shape[1]: + import pdb; pdb.set_trace() + + comb_fea_mat[cnt,4:,t2_min-t_min:t2_max-t_min+1,0] = np.transpose(tracklet_mat['appearance_fea_mat'][cand_idx,2:]) + X2.append(tracklet_mat['appearance_fea_mat'][cand_idx,2:]) + else: + t_len1 = t1_max-t1_min+1 + t_len2 = t2_max-t2_min+1 + t_len_min = min(t_len1,t_len2) + mid_t = int(0.5*(t1_max+t2_min)) + if mid_t-t1_min+1>=0.5*max_length and t2_max-mid_t+1<=0.5*max_length: + t2_end = t2_max + t1_start = t2_end-max_length+1 + #t1_start = mid_t-int(0.5*max_length)+1 + #t2_end = t1_start+max_length-1 + elif mid_t-t1_min+1<=0.5*max_length and t2_max-mid_t+1>=0.5*max_length: + t1_start = t1_min + t2_end = t1_start+max_length-1 + else: # mid_t-t1_min+1>=0.5*max_length and t2_max-mid_t+1>=0.5*max_length: + t1_start = mid_t-int(0.5*max_length)+1 + t2_end = t1_start+max_length-1 + + comb_fea_mat[cnt,:,0:t1_max-t1_start+1,1] = comb_fea_mat[cnt,:,0:t1_max-t1_start+1,1]+1 + if comb_fea_mat[cnt,0,0:t1_max-t1_start+1,0].shape[0] \ + !=tracklet_mat['xmax_mat'][track_id1,t1_start:t1_max+1].shape[0]: + import pdb; pdb.set_trace() + comb_fea_mat[cnt,0,0:t1_max-t1_start+1,0] = 0.5*(tracklet_mat['xmin_mat'][track_id1,t1_start:t1_max+1] + +tracklet_mat['xmax_mat'][track_id1,t1_start:t1_max+1])/img_size[1] + comb_fea_mat[cnt,1,0:t1_max-t1_start+1,0] = 0.5*(tracklet_mat['ymin_mat'][track_id1,t1_start:t1_max+1] + +tracklet_mat['ymax_mat'][track_id1,t1_start:t1_max+1])/img_size[0] + comb_fea_mat[cnt,2,0:t1_max-t1_start+1,0] = (tracklet_mat['xmax_mat'][track_id1,t1_start:t1_max+1] + -tracklet_mat['xmin_mat'][track_id1,t1_start:t1_max+1]+1)/img_size[1] + comb_fea_mat[cnt,3,0:t1_max-t1_start+1,0] = (tracklet_mat['ymax_mat'][track_id1,t1_start:t1_max+1] + -tracklet_mat['ymin_mat'][track_id1,t1_start:t1_max+1]+1)/img_size[0] + cand_idx = np.where(tracklet_mat['appearance_fea_mat'][:,0]==track_id1+1)[0] + cand_idx = cand_idx[t1_start-t1_min:] + comb_fea_mat[cnt,4:,0:t1_max-t1_start+1,0] = np.transpose(tracklet_mat['appearance_fea_mat'][cand_idx,2:]) + X1.append(tracklet_mat['appearance_fea_mat'][cand_idx,2:]) + + comb_fea_mat[cnt,:,t2_min-t1_start:t2_end-t1_start+1,2] = 1 + if comb_fea_mat[cnt,0,t2_min-t1_start:t2_end-t1_start+1,0].shape[0] \ + !=tracklet_mat['xmin_mat'][track_id2,t2_min:t2_end+1].shape[0]: + import pdb; pdb.set_trace() + comb_fea_mat[cnt,0,t2_min-t1_start:t2_end-t1_start+1,0] = 0.5*(tracklet_mat['xmin_mat'][track_id2,t2_min:t2_end+1] + +tracklet_mat['xmax_mat'][track_id2,t2_min:t2_end+1])/img_size[1] + comb_fea_mat[cnt,1,t2_min-t1_start:t2_end-t1_start+1,0] = 0.5*(tracklet_mat['ymin_mat'][track_id2,t2_min:t2_end+1] + +tracklet_mat['ymax_mat'][track_id2,t2_min:t2_end+1])/img_size[0] + comb_fea_mat[cnt,2,t2_min-t1_start:t2_end-t1_start+1,0] = (tracklet_mat['xmax_mat'][track_id2,t2_min:t2_end+1] + -tracklet_mat['xmin_mat'][track_id2,t2_min:t2_end+1]+1)/img_size[1] + comb_fea_mat[cnt,3,t2_min-t1_start:t2_end-t1_start+1,0] = (tracklet_mat['ymax_mat'][track_id2,t2_min:t2_end+1] + -tracklet_mat['ymin_mat'][track_id2,t2_min:t2_end+1]+1)/img_size[0] + cand_idx = np.where(tracklet_mat['appearance_fea_mat'][:,0]==track_id2+1)[0] + #import pdb; pdb.set_trace() + cand_idx = cand_idx[0:t2_end-t2_min+1] + comb_fea_mat[cnt,4:,t2_min-t1_start:t2_end-t1_start+1,0] \ + = np.transpose(tracklet_mat['appearance_fea_mat'][cand_idx,2:]) + X2.append(tracklet_mat['appearance_fea_mat'][cand_idx,2:]) + + #if track_id1==34 and track_id2==39: + # import pdb; pdb.set_trace() + + # remove overlap detections + t_overlap = np.where(comb_fea_mat[cnt,0,:,1]>1) + if len(t_overlap)>0: + t_overlap = t_overlap[0] + comb_fea_mat[cnt,:,t_overlap,:] = 0 + + + if len(track_set)>0: + search_idx = np.where(np.logical_and(track_set[:,0]==track_id1, track_set[:,1]==track_id2)) + if len(search_idx[0])>0: + #save_fea_mat[search_idx[0][0],:,:,:] = comb_fea_mat[n,:,:,:] + if track_set[search_idx[0][0],2]==1: + comb_fea_label[cnt,2] = 1 + else: + comb_fea_label[cnt,3] = 1 + + + if len(remove_ids)>0: + comb_fea_mat = np.delete(comb_fea_mat, np.array(remove_ids), axis=0) + comb_fea_label = np.delete(comb_fea_label, np.array(remove_ids), axis=0) + + if len(comb_fea_mat)>0: + + if track_struct['track_params']['use_net']==0: + for n in range(len(X1)): + pair_cost = spatial.distance.cdist(X1[n], X2[n], 'euclidean') + min_cost = np.min(pair_cost) + cost = cost+min_cost-7 + tracklet_mat['comb_track_cost_mask'][temp_cost_list[n][0],temp_cost_list[n][1]] = 1 + tracklet_mat['comb_track_cost'][temp_cost_list[n][0],temp_cost_list[n][1]] = min_cost-7 + #cost = cost+track_struct['track_params']['cost_bias']*len(sort_idx) + return cost + + #************************************* + comb_fea_mat = track_lib.interp_batch(comb_fea_mat) + #************************************* + + max_batch_size = 16 + num_batch = int(np.ceil(comb_fea_mat.shape[0]/max_batch_size)) + pred_y = np.zeros((comb_fea_mat.shape[0],2)) + for n in range(num_batch): + if n!=num_batch-1: + batch_size = max_batch_size + else: + batch_size = int(comb_fea_mat.shape[0]-(num_batch-1)*max_batch_size) + + batch_size = comb_fea_mat.shape[0] + x = np.zeros((batch_size,1,max_length,1)) + y = np.zeros((batch_size,1,max_length,1)) + w = np.zeros((batch_size,1,max_length,1)) + h = np.zeros((batch_size,1,max_length,1)) + ap = np.zeros((batch_size,feature_size-4,max_length,1)) + mask_1 = np.zeros((batch_size,1,max_length,2)) + mask_2 = np.zeros((batch_size,feature_size-4,max_length,2)) + x[:,0,:,0] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,0,:,0] + y[:,0,:,0] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,1,:,0] + w[:,0,:,0] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,2,:,0] + h[:,0,:,0] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,3,:,0] + ap[:,:,:,0] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,4:,:,0] + mask_1[:,0,:,:] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,0,:,1:] + mask_2[:,:,:,:] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,4:,:,1:] + pred_y[n*max_batch_size:n*max_batch_size+batch_size,:] = sess.run(y_conv, feed_dict={batch_X_x: x, + batch_X_y: y, + batch_X_w: w, + batch_X_h: h, + batch_X_a: ap, + batch_mask_1: mask_1, + batch_mask_2: mask_2, + batch_Y: np.zeros((batch_size,2)), + keep_prob: 1.0}) + + for n in range(len(pred_y)): + if np.sum(comb_fea_label[n,2:4])>0: + continue + if pred_y[n,0]>pred_y[n,1]: + comb_fea_label[n,2] = 1 + else: + comb_fea_label[n,3] = 1 + + if comb_fea_mat.shape[0]!=len(pred_y): + import pdb; pdb.set_trace() + + #print(comb_fea_label) + ''' + all_fea_mat[fea_id:fea_id+len(pred_y),:,:,:] = comb_fea_mat + all_fea_label[fea_id:fea_id+len(pred_y),:] = comb_fea_label + ''' + #if len(np.where(np.logical_and(comb_fea_label[:,0]==428,comb_fea_label[:,1]==435))[0])>0: + # import pdb; pdb.set_trace() + #print(comb_fea_label) + + cost = cost+np.sum(pred_y[:,1]-pred_y[:,0]) + #import pdb; pdb.set_trace() + + if pred_y.shape[0]!=len(temp_cost_list): + import pdb; pdb.set_trace() + for n in range(pred_y.shape[0]): + #import pdb; pdb.set_trace() + ''' + if tracklet_mat['comb_track_cost_mask'].shape[0]<=temp_cost_list[n][0] \ + or tracklet_mat['comb_track_cost_mask'].shape[1]<=temp_cost_list[n][1]: + import pdb; pdb.set_trace() + ''' + tracklet_mat['comb_track_cost_mask'][temp_cost_list[n][0],temp_cost_list[n][1]] = 1 + tracklet_mat['comb_track_cost'][temp_cost_list[n][0],temp_cost_list[n][1]] = pred_y[n,1]-pred_y[n,0] + + #comb_track_cost_list = comb_track_cost_list+temp_cost_list + #print(np.sum(tracklet_mat['comb_track_cost_mask'])) + + #cost = cost+track_struct['track_params']['cost_bias']*len(sort_idx) + return cost + +def get_split_cost(track_id, sess, img_size, batch_X_x, batch_X_y, batch_X_w, batch_X_h, + batch_X_a, batch_mask_1, batch_mask_2, batch_Y, keep_prob, y_conv): + #comb_track_cost_list = tracklet_mat['comb_track_cost'].copy() + #save_fea_mat = tracklet_mat['save_fea_mat'].copy() + + global track_struct + + + tracklet_mat = track_struct['tracklet_mat'] + new_cluster_cost = np.zeros((2,1)) + if len(tracklet_mat['track_cluster'][track_id])<2: + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + track_interval = tracklet_mat['track_interval'].copy() + change_cluster_idx = [len(tracklet_mat['track_cluster']), tracklet_mat['track_class'][track_id]] + new_cluster_set = [] + new_cluster_set.append([track_id]) + remain_tracks = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + remain_tracks.remove(track_id) + new_cluster_set.append(remain_tracks) + + # get cost + if len(remain_tracks)>1: + sort_idx = np.argsort(track_interval[np.array(new_cluster_set[1]),1]) + for n in range(0, len(sort_idx)-1): + track_id1 = new_cluster_set[1][sort_idx[n]] + track_id2 = new_cluster_set[1][sort_idx[n+1]] + #if track_id1==42: + # import pdb; pdb.set_trace() + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + #********************************* + new_cluster_cost[1,0] = comb_cost(remain_tracks, feature_size, + max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, + batch_Y, keep_prob, y_conv) + #tracklet_mat['comb_track_cost'] = comb_track_cost_list.copy() + #tracklet_mat['save_fea_mat'] = save_fea_mat.copy() + + # cross cost + comb_cluster = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + sort_idx = np.argsort(track_interval[np.array(comb_cluster),1]) + cross_cost = np.zeros((2,1)) + ''' + for n in range(0, len(sort_idx)-1): + track_id1 = comb_cluster[sort_idx[n]] + track_id2 = comb_cluster[sort_idx[n+1]] + if (track_id1 in new_cluster_set[0] and track_id2 in new_cluster_set[1]) \ + or (track_id1 in new_cluster_set[1] and track_id2 in new_cluster_set[0]): + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + continue + cross_cost[1,0] = cross_cost[1,0]+comb_cost([track_id1,track_id2], feature_size, max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, + batch_Y, keep_prob, y_conv) + ''' + + # bias cost + #************************************* + new_cluster_cost[0,0] = track_struct['track_params']['cost_bias'] + new_cluster_cost[1,0] = new_cluster_cost[1,0]+track_struct['track_params']['cost_bias'] + + + #************************************* + cost = np.sum(new_cluster_cost)-cross_cost[1,0] + prev_cost = tracklet_mat['cluster_cost'][tracklet_mat['track_class'][track_id]]-cross_cost[0,0] + diff_cost = cost-prev_cost + + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + +def get_assign_cost(track_id, sess, img_size, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, batch_Y, keep_prob, y_conv): + #comb_track_cost_list = tracklet_mat['comb_track_cost'].copy() + #save_fea_mat = tracklet_mat['save_fea_mat'].copy() + + global track_struct + tracklet_mat = track_struct['tracklet_mat'] + #import pdb; pdb.set_trace() + cluster1 = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + new_cluster_cost = np.zeros((2,1)) + new_cluster_set = [] + new_cluster_set.append(cluster1.copy()) + new_cluster_set[0].remove(track_id) + track_interval = tracklet_mat['track_interval'].copy() + # get cost + if len(new_cluster_set[0])>1: + + sort_idx = np.argsort(track_interval[np.array(new_cluster_set[0]),1]) + for n in range(0, len(sort_idx)-1): + track_id1 = new_cluster_set[0][sort_idx[n]] + track_id2 = new_cluster_set[0][sort_idx[n+1]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + new_cluster_cost[0,0] = comb_cost(new_cluster_set[0], feature_size, + max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, batch_X_h, + batch_X_a, batch_mask_1, batch_mask_2, batch_Y, keep_prob, + y_conv) + #tracklet_mat['comb_track_cost'] = comb_track_cost_list.copy() + #tracklet_mat['save_fea_mat'] = save_fea_mat.copy() + + track_class = track_struct['tracklet_mat']['track_class'][track_id] + t_cluster_idx = track_struct['tracklet_mat']['track_cluster_t_idx'][track_class] + + NN_cluster = len(tracklet_mat['track_cluster']) + temp_new_cluster_cost = float("inf")*np.ones((NN_cluster,1)) + prev_cost_vec = np.zeros((NN_cluster,1)) + cross_cost_vec = np.zeros((NN_cluster,2)) + + for nn in range(len(t_cluster_idx)): + N_cluster = len(track_struct['tracklet_mat']['time_cluster'][t_cluster_idx[nn]]) + for mm in range(N_cluster): + n = track_struct['tracklet_mat']['time_cluster'][t_cluster_idx[nn]][mm] + # the original cluster + if tracklet_mat['track_class'][track_id]==n: + continue + + # check neighbor and conflict track + cluster2 = tracklet_mat['track_cluster'][n] + neighbor_flag = 1 + conflict_flag = 0 + #remove_flag = 0 + temp_cluster_set = cluster2.copy() + temp_cluster_set.append(track_id) + sort_idx = np.argsort(track_interval[np.array(temp_cluster_set),1]) + for m in range(0, len(sort_idx)-1): + track_id1 = temp_cluster_set[sort_idx[m]] + track_id2 = temp_cluster_set[sort_idx[m+1]] + #if cluster2[m] in remove_set: + # remove_flag = 1 + # break + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + neighbor_flag = 0 + break + if track_id1 in tracklet_mat['conflict_track_idx'][track_id2]: + conflict_flag = 1 + break + if neighbor_flag==0 or conflict_flag==1:# or remove_flag==1: + continue + + # get cost + temp_set = cluster2.copy() + temp_set.append(track_id) + temp_new_cluster_cost[n,0] = comb_cost(temp_set, feature_size, + max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, + batch_Y, keep_prob, y_conv) + #tracklet_mat['comb_track_cost'] = comb_track_cost_list.copy() + #tracklet_mat['save_fea_mat'] = save_fea_mat.copy() + + #import pdb; pdb.set_trace() + prev_cost_vec[n,0] = tracklet_mat['cluster_cost'][tracklet_mat['track_class'][track_id]] \ + +tracklet_mat['cluster_cost'][n] + ''' + # cross cost + comb_cluster = cluster1+cluster2 + sort_idx = np.argsort(track_interval[np.array(comb_cluster),1]) + for m in range(0, len(sort_idx)-1): + track_id1 = comb_cluster[sort_idx[m]] + track_id2 = comb_cluster[sort_idx[m+1]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + continue + if (track_id1 in cluster1 and track_id2 in cluster2) or (track_id1 in cluster2 and track_id2 in cluster1): + cross_cost_vec[n,0] = cross_cost_vec[n,0]+comb_cost([track_id1,track_id2], feature_size, max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, + batch_Y, keep_prob, y_conv) + if (track_id1 in new_cluster_set[0] and track_id2 in temp_set) or \ + (track_id1 in temp_set and track_id2 in new_cluster_set[0]): + cross_cost_vec[n,1] = cross_cost_vec[n,1]+comb_cost([track_id1,track_id2], feature_size, max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, + batch_Y, keep_prob, y_conv) + ''' + + + new_cluster_cost[0,0] = new_cluster_cost[0,0]+track_struct['track_params']['cost_bias'] + cost_vec = temp_new_cluster_cost[:,0]+track_struct['track_params']['cost_bias']+new_cluster_cost[0,0]-cross_cost_vec[:,1] + prev_cost_vec = prev_cost_vec[:,0]-cross_cost_vec[:,0] + + diff_cost_vec = cost_vec-prev_cost_vec + min_idx = np.argmin(diff_cost_vec) + cost = cost_vec[min_idx] + if cost==float("inf"): + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + diff_cost = diff_cost_vec[min_idx] + new_cluster_cost[1,0] = temp_new_cluster_cost[min_idx,0]+track_struct['track_params']['cost_bias'] + change_cluster_idx = [tracklet_mat['track_class'][track_id],min_idx] + temp_set = tracklet_mat['track_cluster'][min_idx].copy() + temp_set.append(track_id) + new_cluster_set.append(temp_set) + + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + +def get_merge_cost(track_id, sess, img_size, batch_X_x, batch_X_y, batch_X_w, batch_X_h, + batch_X_a, batch_mask_1, batch_mask_2, batch_Y, keep_prob, y_conv): + #comb_track_cost_list = tracklet_mat['comb_track_cost'].copy() + #save_fea_mat = tracklet_mat['save_fea_mat'].copy() + + global track_struct + tracklet_mat = track_struct['tracklet_mat'] + track_interval = tracklet_mat['track_interval'].copy() + cluster1 = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + if len(cluster1)==1: + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + track_class = track_struct['tracklet_mat']['track_class'][track_id] + t_cluster_idx = track_struct['tracklet_mat']['track_cluster_t_idx'][track_class] + + NN_cluster = len(tracklet_mat['track_cluster']) + new_cluster_cost_vec = float("inf")*np.ones((NN_cluster,1)) + prev_cost_vec = np.zeros((NN_cluster,1)) + cross_cost_vec = np.zeros((NN_cluster,2)) + + for nn in range(len(t_cluster_idx)): + N_cluster = len(track_struct['tracklet_mat']['time_cluster'][t_cluster_idx[nn]]) + + for mm in range(N_cluster): + n = track_struct['tracklet_mat']['time_cluster'][t_cluster_idx[nn]][mm] + + # the original cluster + if tracklet_mat['track_class'][track_id]==n: + continue + + # check neighbor and conflict track + cluster2 = tracklet_mat['track_cluster'][n].copy() + if len(cluster2)<=1: + continue + + neighbor_flag = 1 + conflict_flag = 0 + #remove_flag = 0 + temp_cluster_set = cluster1+cluster2 + sort_idx = np.argsort(track_interval[np.array(temp_cluster_set),1]) + for m in range(0, len(sort_idx)-1): + track_id1 = temp_cluster_set[sort_idx[m]] + track_id2 = temp_cluster_set[sort_idx[m+1]] + + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + neighbor_flag = 0 + break + if track_id1 in tracklet_mat['conflict_track_idx'][track_id2]: + conflict_flag = 1 + break + + if neighbor_flag==0 or conflict_flag==1:# or remove_flag==1: + continue + + + # get cost + new_cluster_cost_vec[n,0] = comb_cost(cluster1+cluster2, feature_size, + max_length, img_size, sess, batch_X_x, batch_X_y, + batch_X_w, batch_X_h, batch_X_a, batch_mask_1, + batch_mask_2, batch_Y, keep_prob, y_conv) + #tracklet_mat['comb_track_cost'] = comb_track_cost_list.copy() + #tracklet_mat['save_fea_mat'] = save_fea_mat.copy() + + prev_cost_vec[n,0] = tracklet_mat['cluster_cost'][tracklet_mat['track_class'][track_id]] \ + +tracklet_mat['cluster_cost'][n] + + ''' + # cross cost + comb_cluster = cluster1+cluster2 + sort_idx = np.argsort(track_interval[np.array(comb_cluster),1]) + for m in range(0, len(sort_idx)-1): + track_id1 = comb_cluster[sort_idx[m]] + track_id2 = comb_cluster[sort_idx[m+1]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + continue + if (track_id1 in cluster1 and track_id2 in cluster2) or (track_id1 in cluster2 and track_id2 in cluster1): + cross_cost_vec[n,0] = cross_cost_vec[n,0]+comb_cost([track_id1,track_id2], feature_size, max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, + batch_Y, keep_prob, y_conv) + ''' + prev_cost_vec = prev_cost_vec[:,0]-cross_cost_vec[:,0] + diff_cost_vec = new_cluster_cost_vec[:,0]-prev_cost_vec + min_idx = np.argmin(diff_cost_vec) + cost = new_cluster_cost_vec[min_idx,0] + if cost==float("inf"): + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + diff_cost = diff_cost_vec[min_idx]+track_struct['track_params']['cost_bias'] + new_cluster_cost = np.zeros((2,1)) + new_cluster_cost[0,0] = cost+track_struct['track_params']['cost_bias'] + change_cluster_idx = [tracklet_mat['track_class'][track_id], min_idx] + new_cluster_set = [] + temp_set = cluster1.copy() + temp_set = temp_set+tracklet_mat['track_cluster'][min_idx] + new_cluster_set.append(temp_set) + new_cluster_set.append([]) + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + +def get_switch_cost(track_id, sess, img_size, batch_X_x, batch_X_y, batch_X_w, batch_X_h, + batch_X_a, batch_mask_1, batch_mask_2, batch_Y, keep_prob, y_conv): + #comb_track_cost_list = tracklet_mat['comb_track_cost'].copy() + #save_fea_mat = tracklet_mat['save_fea_mat'].copy() + + global track_struct + tracklet_mat = track_struct['tracklet_mat'] + track_interval = tracklet_mat['track_interval'].copy() + cluster1 = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + S1 = [] + S2 = [] + for k in range(len(cluster1)): + temp_id = cluster1[k] + if tracklet_mat['track_interval'][temp_id,1]<=tracklet_mat['track_interval'][track_id,1]: + S1.append(temp_id) + else: + S2.append(temp_id) + if len(S1)==0 or len(S2)==0: + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + track_class = track_struct['tracklet_mat']['track_class'][track_id] + t_cluster_idx = track_struct['tracklet_mat']['track_cluster_t_idx'][track_class] + + NN_cluster = len(tracklet_mat['track_cluster']) + cost_vec = float("inf")*np.ones((NN_cluster,1)) + prev_cost_vec = np.zeros((NN_cluster,1)) + new_cluster_cost_vec1 = float("inf")*np.ones((NN_cluster,1)) + new_cluster_cost_vec2 = float("inf")*np.ones((NN_cluster,1)) + cross_cost_vec = np.zeros((NN_cluster,2)) + + track_id_set = [] + for n in range(NN_cluster): + track_id_set.append([]) + + for nn in range(len(t_cluster_idx)): + N_cluster = len(track_struct['tracklet_mat']['time_cluster'][t_cluster_idx[nn]]) + + for mm in range(N_cluster): + n = track_struct['tracklet_mat']['time_cluster'][t_cluster_idx[nn]][mm] + + # the original cluster + if tracklet_mat['track_class'][track_id]==n: + continue + + # switch availability check + S3 = [] + S4 = [] + #remove_flag = 0 + cluster2 = tracklet_mat['track_cluster'][n].copy() + for k in range(len(cluster2)): + temp_id = cluster2[k] + #if temp_id in remove_set: + # remove_flag = 1 + # break + if tracklet_mat['track_interval'][temp_id,1]<=tracklet_mat['track_interval'][track_id,1]: + S3.append(temp_id) + else: + #******************************************** + if tracklet_mat['track_interval'][temp_id,1] >=tracklet_mat['track_interval'][track_id,1] \ + and tracklet_mat['track_interval'][temp_id,0] <=tracklet_mat['track_interval'][track_id,1]: + if tracklet_mat['track_interval'][temp_id,1] -tracklet_mat['track_interval'][track_id,1] \ + >tracklet_mat['track_interval'][track_id,1]-tracklet_mat['track_interval'][temp_id,0]: + S4.append(temp_id) + else: + S3.append(temp_id) + else: + S4.append(temp_id) + + #if remove_flag==1: + # continue + + neighbor_flag1 = 1 + conflict_flag1 = 0 + if len(S3)==0: + neighbor_flag1 = 1 + conflict_flag1 = 0 + else: + temp_cluster_set = S3+S2 + sort_idx = np.argsort(track_interval[np.array(temp_cluster_set),1]) + for k in range(0,len(sort_idx)-1): + track_id1 = temp_cluster_set[sort_idx[k]] + track_id2 = temp_cluster_set[sort_idx[k+1]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + neighbor_flag1 = 0 + break + if track_id1 in tracklet_mat['conflict_track_idx'][track_id2]: + conflict_flag1 = 1 + break + + + neighbor_flag2 = 1 + conflict_flag2 = 0 + if len(S4)==0: + neighbor_flag2 = 1 + conflict_flag2 = 0 + else: + temp_cluster_set = S4+S1 + sort_idx = np.argsort(track_interval[np.array(temp_cluster_set),1]) + for k in range(0,len(sort_idx)-1): + track_id1 = temp_cluster_set[sort_idx[k]] + track_id2 = temp_cluster_set[sort_idx[k+1]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + neighbor_flag2 = 0 + break + if track_id1 in tracklet_mat['conflict_track_idx'][track_id2]: + conflict_flag2 = 1 + break + + if neighbor_flag1==0 or conflict_flag1==1 or neighbor_flag2==0 or conflict_flag2==1: + continue + + + + # get cost + S_1 = S1+S4 + S_2 = S2+S3 + + #if (428 in S_1 and 435 in S_1) or (428 in S_2 and 435 in S_2): + # import pdb; pdb.set_trace() + + new_cluster_cost_vec1[n,0] = comb_cost(S_1, feature_size, max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, + batch_Y, keep_prob, y_conv) + #tracklet_mat['comb_track_cost'] = comb_track_cost_list.copy() + #tracklet_mat['save_fea_mat'] = save_fea_mat.copy() + new_cluster_cost_vec2[n,0] = comb_cost(S_2, feature_size, max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, + batch_Y, keep_prob, y_conv) + #tracklet_mat['comb_track_cost'] = comb_track_cost_list.copy() + #tracklet_mat['save_fea_mat'] = save_fea_mat.copy() + cost_vec[n,0] = new_cluster_cost_vec1[n,0]+new_cluster_cost_vec2[n,0] + + track_id_set[n].append(S_1.copy()) + track_id_set[n].append(S_2.copy()) + prev_cost_vec[n,0] = tracklet_mat['cluster_cost'][tracklet_mat['track_class'][track_id]] \ + +tracklet_mat['cluster_cost'][n] + + ''' + # cross cost + comb_cluster = S_1+S_2 + sort_idx = np.argsort(track_interval[np.array(comb_cluster),1]) + for m in range(0, len(sort_idx)-1): + track_id1 = comb_cluster[sort_idx[m]] + track_id2 = comb_cluster[sort_idx[m+1]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + continue + if (track_id1 in cluster1 and track_id2 in cluster2) or (track_id1 in cluster2 and track_id2 in cluster1): + cross_cost_vec[n,0] = cross_cost_vec[n,0]+comb_cost([track_id1,track_id2], feature_size, max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, + batch_Y, keep_prob, y_conv) + if (track_id1 in S_1 and track_id2 in S_2) or (track_id1 in S_2 and track_id2 in S_1): + cross_cost_vec[n,1] = cross_cost_vec[n,1]+comb_cost([track_id1,track_id2], feature_size, max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, + batch_Y, keep_prob, y_conv) + ''' + + cost_vec = cost_vec[:,0]-cross_cost_vec[:,1] + prev_cost_vec = prev_cost_vec[:,0]-cross_cost_vec[:,0] + diff_cost_vec = cost_vec-prev_cost_vec + min_idx = np.argmin(diff_cost_vec) + cost = cost_vec[min_idx] + if cost==float("inf"): + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + diff_cost = diff_cost_vec[min_idx] + new_cluster_cost = np.zeros((2,1)) + new_cluster_cost[0,0] = new_cluster_cost_vec1[min_idx,0]+track_struct['track_params']['cost_bias'] + new_cluster_cost[1,0] = new_cluster_cost_vec2[min_idx,0]+track_struct['track_params']['cost_bias'] + diff_cost = diff_cost+2*track_struct['track_params']['cost_bias'] + + change_cluster_idx = [tracklet_mat['track_class'][track_id], min_idx] + new_cluster_set = [] + new_cluster_set.append(track_id_set[min_idx][0]) + new_cluster_set.append(track_id_set[min_idx][1]) + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + +def get_break_cost(track_id, sess, img_size, batch_X_x, batch_X_y, batch_X_w, batch_X_h, + batch_X_a, batch_mask_1, batch_mask_2, batch_Y, keep_prob, y_conv): + #comb_track_cost_list = tracklet_mat['comb_track_cost'].copy() + #save_fea_mat = tracklet_mat['save_fea_mat'].copy() + ''' + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + ''' + + global track_struct + tracklet_mat = track_struct['tracklet_mat'] + + new_cluster_cost = np.zeros((2,1)) + cluster1 = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + if len(cluster1)<=2: + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + # get cost + after_ids = [] + for n in range(len(cluster1)): + if tracklet_mat['track_interval'][cluster1[n],1]>tracklet_mat['track_interval'][track_id,1]: + after_ids.append(cluster1[n]) + + if len(after_ids)==0: + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + before_ids = list(set(cluster1)-set(after_ids)) + if len(before_ids)<=1: + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + change_cluster_idx = [len(tracklet_mat['track_cluster']), tracklet_mat['track_class'][track_id]] + new_cluster_set = [] + new_cluster_set.append(before_ids) + remain_tracks = after_ids + new_cluster_set.append(remain_tracks) + new_cluster_cost[0,0] = comb_cost(new_cluster_set[0], feature_size, + max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, batch_X_h, + batch_X_a, batch_mask_1, batch_mask_2, batch_Y, keep_prob, + y_conv) + #tracklet_mat['comb_track_cost'] = comb_track_cost_list.copy() + #tracklet_mat['save_fea_mat'] = save_fea_mat.copy() + new_cluster_cost[1,0] = comb_cost(new_cluster_set[1], feature_size, + max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, batch_X_h, + batch_X_a, batch_mask_1, batch_mask_2, batch_Y, keep_prob, + y_conv) + #tracklet_mat['comb_track_cost'] = comb_track_cost_list.copy() + #tracklet_mat['save_fea_mat'] = save_fea_mat.copy() + new_cluster_cost = new_cluster_cost+track_struct['track_params']['cost_bias'] + cost = np.sum(new_cluster_cost) + diff_cost = cost-tracklet_mat['cluster_cost'][tracklet_mat['track_class'][track_id]] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + +def update_tracklet_mat(tracklet_mat): + final_tracklet_mat = tracklet_mat.copy() + track_interval = tracklet_mat['track_interval'] + num_cluster = len(tracklet_mat['track_cluster']) + final_tracklet_mat['track_id_mat'] = -1*np.ones((num_cluster,tracklet_mat['xmin_mat'].shape[1])) + + final_xmin_mat = -1*np.ones((num_cluster, final_tracklet_mat['xmin_mat'].shape[1])) + final_ymin_mat = -1*np.ones((num_cluster, final_tracklet_mat['ymin_mat'].shape[1])) + final_xmax_mat = -1*np.ones((num_cluster, final_tracklet_mat['xmax_mat'].shape[1])) + final_ymax_mat = -1*np.ones((num_cluster, final_tracklet_mat['ymax_mat'].shape[1])) + final_det_score_mat = -1*np.ones((num_cluster, final_tracklet_mat['det_score_mat'].shape[1])) + final_tracklet_mat['xmin_mat'] = final_xmin_mat.copy() + final_tracklet_mat['ymin_mat'] = final_ymin_mat.copy() + final_tracklet_mat['xmax_mat'] = final_xmax_mat.copy() + final_tracklet_mat['ymax_mat'] = final_ymax_mat.copy() + final_tracklet_mat['det_score_mat'] = final_det_score_mat.copy() + + for n in range(num_cluster): + for k in range(len(final_tracklet_mat['track_cluster'][n])): + temp_id = final_tracklet_mat['track_cluster'][n][k] + #import pdb; pdb.set_trace() + final_tracklet_mat['track_id_mat'][n,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] = temp_id + final_xmin_mat[n,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] \ + = tracklet_mat['xmin_mat'][temp_id,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] + final_ymin_mat[n,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] \ + = tracklet_mat['ymin_mat'][temp_id,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] + final_xmax_mat[n,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] \ + = tracklet_mat['xmax_mat'][temp_id,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] + final_ymax_mat[n,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] \ + = tracklet_mat['ymax_mat'][temp_id,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] + final_det_score_mat[n,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] \ + = tracklet_mat['det_score_mat'][temp_id,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] + + det_xmin_mat = final_xmin_mat.copy() + det_ymin_mat = final_ymin_mat.copy() + det_xmax_mat = final_xmax_mat.copy() + det_ymax_mat = final_ymax_mat.copy() + det_det_score_mat = final_det_score_mat.copy() + + window_size = 2 + for n in range(num_cluster): + det_idx = np.where(final_xmin_mat[n,:]!=-1)[0] + t_min = np.min(det_idx) + t_max = np.max(det_idx) + miss_idx = np.where(final_xmin_mat[n,t_min:t_max+1]==-1)[0] + if len(miss_idx)==0: + continue + miss_idx = miss_idx+t_min + final_xmin_mat[n,miss_idx] = np.interp(miss_idx, det_idx, final_xmin_mat[n,det_idx]) + + final_ymin_mat[n,miss_idx] = np.interp(miss_idx, det_idx, final_ymin_mat[n,det_idx]) + + final_xmax_mat[n,miss_idx] = np.interp(miss_idx, det_idx, final_xmax_mat[n,det_idx]) + + final_ymax_mat[n,miss_idx] = np.interp(miss_idx, det_idx, final_ymax_mat[n,det_idx]) + + final_det_score_mat[n,miss_idx] = np.interp(miss_idx, det_idx, final_det_score_mat[n,det_idx]) + + + ''' + # merge two trajectories if they overlap + bbox_overlap_thresh = 0.7 + time_overlap_tresh = 5 + det_overlap_thresh = 0.1 + bbox_overlap_mat = np.zeros((num_cluster,num_cluster)) + for n in range(num_cluster-1): + for m in range(n+1,num_cluster): + cand_t = np.where(np.logical_and(final_xmin_mat[n,:]!=-1, final_xmin_mat[m,:]!=-1))[0] + if len(cand_t)det_overlap_thresh: + continue + + final_tracklet_mat['track_id_mat'][n,int(t2_min):int(t2_max)+1] = \ + final_tracklet_mat['track_id_mat'][m,int(t2_min):int(t2_max)+1] + final_xmin_mat[n,int(t2_min):int(t2_max)+1] = final_xmin_mat[m,int(t2_min):int(t2_max)+1] + final_ymin_mat[n,int(t2_min):int(t2_max)+1] = final_ymin_mat[m,int(t2_min):int(t2_max)+1] + final_xmax_mat[n,int(t2_min):int(t2_max)+1] = final_xmax_mat[m,int(t2_min):int(t2_max)+1] + final_ymax_mat[n,int(t2_min):int(t2_max)+1] = final_ymax_mat[m,int(t2_min):int(t2_max)+1] + final_det_score_mat[n,int(t2_min):int(t2_max)+1] = final_det_score_mat[m,int(t2_min):int(t2_max)+1] + + final_tracklet_mat['track_id_mat'][m,int(t2_min):int(t2_max)+1] = -1 + final_xmin_mat[m,:] = -1 + final_ymin_mat[m,:] = -1 + final_xmax_mat[m,:] = -1 + final_ymax_mat[m,:] = -1 + final_det_score_mat[m,:] = -1 + + ''' + final_tracklet_mat['xmin_mat'] = final_xmin_mat + final_tracklet_mat['ymin_mat'] = final_ymin_mat + final_tracklet_mat['xmax_mat'] = final_xmax_mat + final_tracklet_mat['ymax_mat'] = final_ymax_mat + final_tracklet_mat['det_score_mat'] = final_det_score_mat + + + + # moving average + for n in range(num_cluster): + cand_t = np.where(final_xmin_mat[n,:]!=-1)[0] + if len(cand_t)==0: + continue + t1 = int(np.min(cand_t)) + t2 = int(np.max(cand_t)) + for k in range(t1,t2+1): + t_start = max(k-window_size,t1) + t_end = min(k+window_size,t2) + final_tracklet_mat['xmin_mat'][n,k] = np.sum(final_xmin_mat[n,t_start:t_end+1])/(t_end-t_start+1) + final_tracklet_mat['ymin_mat'][n,k] = np.sum(final_ymin_mat[n,t_start:t_end+1])/(t_end-t_start+1) + final_tracklet_mat['xmax_mat'][n,k] = np.sum(final_xmax_mat[n,t_start:t_end+1])/(t_end-t_start+1) + final_tracklet_mat['ymax_mat'][n,k] = np.sum(final_ymax_mat[n,t_start:t_end+1])/(t_end-t_start+1) + final_tracklet_mat['det_score_mat'][n,k] = np.sum(final_det_score_mat[n,t_start:t_end+1])/(t_end-t_start+1) + + + return final_tracklet_mat + +def post_processing(): + + global track_struct + tracklet_mat = track_struct['tracklet_mat'] + track_params = track_struct['track_params'] + new_tracklet_mat = tracklet_mat.copy() + #import pdb; pdb.set_trace() + + # update track cluster + N_cluster = len(tracklet_mat["track_cluster"]) + remove_idx = [] + for n in range(N_cluster): + if len(tracklet_mat["track_cluster"][n])==0: + remove_idx.append(n) + continue + if tracklet_mat["track_cluster"][n][0] in remove_set: + remove_idx.append(n) + continue + + temp_track_intervals = tracklet_mat["track_interval"][np.array(tracklet_mat["track_cluster"][n]),:] + start_fr = np.min(temp_track_intervals[:,0]) + end_fr = np.max(temp_track_intervals[:,1]) + num_frs = end_fr-start_fr+1 + if num_frs600: + # import pdb; pdb.set_trace() + + # remove_set + if t_cluster_idx[0]==-1: + continue + + #if track_struct['tracklet_mat']['track_class'][track_id]<0: + # continue + + #if track_id in remove_set: + # continue + + diff_cost = np.zeros((5,1)) + new_C = [] # new cost + new_set = [] + change_idx = [] + + #cluster_cost = track_struct['tracklet_mat']['cluster_cost'] + #track_class = track_struct['tracklet_mat']['track_class'] + + # get split cost + #import pdb; pdb.set_trace() + diff_cost[0,0],temp_new_C,temp_new_set,temp_change_idx \ + = get_split_cost(track_id, sess, img_size, + batch_X_x, batch_X_y, batch_X_w, batch_X_h, batch_X_a, batch_mask_1, + batch_mask_2, batch_Y, keep_prob, y_conv) + #track_struct['tracklet_mat']['comb_track_cost'] = comb_track_cost_list.copy() + #track_struct['tracklet_mat']['save_fea_mat'] = save_fea_mat.copy() + new_C.append(temp_new_C) + new_set.append(temp_new_set) + change_idx.append(temp_change_idx) + + # get assign cost + #import pdb; pdb.set_trace() + diff_cost[1,0],temp_new_C,temp_new_set,temp_change_idx \ + = get_assign_cost(track_id, sess, img_size, + batch_X_x, batch_X_y, batch_X_w, batch_X_h, batch_X_a, batch_mask_1, + batch_mask_2, batch_Y, keep_prob, y_conv) + #track_struct['tracklet_mat']['comb_track_cost'] = comb_track_cost_list.copy() + #track_struct['tracklet_mat']['save_fea_mat'] = save_fea_mat.copy() + new_C.append(temp_new_C) + new_set.append(temp_new_set) + change_idx.append(temp_change_idx) + + # get merge cost + diff_cost[2,0],temp_new_C,temp_new_set,temp_change_idx \ + = get_merge_cost(track_id, sess, img_size, + batch_X_x, batch_X_y, batch_X_w, batch_X_h, batch_X_a, batch_mask_1, + batch_mask_2, batch_Y, keep_prob, y_conv) + #track_struct['tracklet_mat']['comb_track_cost'] = comb_track_cost_list.copy() + #track_struct['tracklet_mat']['save_fea_mat'] = save_fea_mat.copy() + new_C.append(temp_new_C) + new_set.append(temp_new_set) + change_idx.append(temp_change_idx) + + # get switch cost + diff_cost[3,0],temp_new_C,temp_new_set,temp_change_idx \ + = get_switch_cost(track_id, sess, img_size, + batch_X_x, batch_X_y, batch_X_w, batch_X_h, batch_X_a, batch_mask_1, + batch_mask_2, batch_Y, keep_prob, y_conv) + #track_struct['tracklet_mat']['comb_track_cost'] = comb_track_cost_list.copy() + #track_struct['tracklet_mat']['save_fea_mat'] = save_fea_mat.copy() + new_C.append(temp_new_C) + new_set.append(temp_new_set) + change_idx.append(temp_change_idx) + + # get break cost + diff_cost[4,0],temp_new_C,temp_new_set,temp_change_idx \ + = get_break_cost(track_id, sess, img_size, + batch_X_x, batch_X_y, batch_X_w, batch_X_h, batch_X_a, batch_mask_1, + batch_mask_2, batch_Y, keep_prob, y_conv) + #track_struct['tracklet_mat']['comb_track_cost'] = comb_track_cost_list.copy() + #track_struct['tracklet_mat']['save_fea_mat'] = save_fea_mat.copy() + new_C.append(temp_new_C) + new_set.append(temp_new_set) + change_idx.append(temp_change_idx) + + # update cluster + min_idx = np.argmin(diff_cost[:,0]) + min_cost = diff_cost[min_idx,0] + if min_cost>=0: + continue + + change_flag = 1 + #if track_id==251: + # import pdb; pdb.set_trace() + + #**************** + #import pdb; pdb.set_trace() + print(min_idx) + print(new_set) + new_t_idx = [] + if len(new_set[min_idx][0])==0: + new_t_idx.append([-1]) + else: + t_min_array = np.zeros((len(new_set[min_idx][0]),1)) + t_max_array = np.zeros((len(new_set[min_idx][0]),1)) + for m in range(len(new_set[min_idx][0])): + t_min_array[m,0] = track_struct['tracklet_mat']['track_interval'][new_set[min_idx][0][m],0] + t_max_array[m,0] = track_struct['tracklet_mat']['track_interval'][new_set[min_idx][0][m],1] + + min_time_cluster_idx = int(np.floor(max(np.min(t_min_array) + -track_struct['track_params']['t_dist_thresh']-5,0) + /track_struct['track_params']['time_cluster_dist'])) + max_time_cluster_idx = int(np.floor(min(np.max(t_max_array) + +track_struct['track_params']['t_dist_thresh']+5, + track_struct['tracklet_mat']['xmin_mat'].shape[1]-1) + /track_struct['track_params']['time_cluster_dist'])) + new_t_idx.append(list(range(min_time_cluster_idx,max_time_cluster_idx+1))) + + if len(new_set[min_idx][1])==0: + new_t_idx.append([-1]) + else: + t_min_array = np.zeros((len(new_set[min_idx][1]),1)) + t_max_array = np.zeros((len(new_set[min_idx][1]),1)) + for m in range(len(new_set[min_idx][1])): + t_min_array[m,0] = track_struct['tracklet_mat']['track_interval'][new_set[min_idx][1][m],0] + t_max_array[m,0] = track_struct['tracklet_mat']['track_interval'][new_set[min_idx][1][m],1] + + min_time_cluster_idx = int(np.floor(max(np.min(t_min_array) + -track_struct['track_params']['t_dist_thresh']-5,0) + /track_struct['track_params']['time_cluster_dist'])) + max_time_cluster_idx = int(np.floor(min(np.max(t_max_array) + +track_struct['track_params']['t_dist_thresh']+5, + track_struct['tracklet_mat']['xmin_mat'].shape[1]-1) + /track_struct['track_params']['time_cluster_dist'])) + new_t_idx.append(list(range(min_time_cluster_idx,max_time_cluster_idx+1))) + + if change_idx[min_idx][0]>=len(track_struct['tracklet_mat']['track_cluster']): + for m in range(len(track_struct['tracklet_mat']['track_cluster']),change_idx[min_idx][0]): + track_struct['tracklet_mat']['track_cluster'].append([]) + track_struct['tracklet_mat']['track_cluster_t_idx'].append([-1]) + track_struct['tracklet_mat']['track_cluster'].append(new_set[min_idx][0]) + track_struct['tracklet_mat']['track_cluster_t_idx'].append(new_t_idx[0]) + else: + track_struct['tracklet_mat']['track_cluster'][change_idx[min_idx][0]] = new_set[min_idx][0] + track_struct['tracklet_mat']['track_cluster_t_idx'][change_idx[min_idx][0]] = new_t_idx[0] + + if change_idx[min_idx][1]>=len(track_struct['tracklet_mat']['track_cluster']): + for m in range(len(track_struct['tracklet_mat']['track_cluster']),change_idx[min_idx][1]): + track_struct['tracklet_mat']['track_cluster'].append([]) + track_struct['tracklet_mat']['track_cluster_t_idx'].append([-1]) + track_struct['tracklet_mat']['track_cluster'].append(new_set[min_idx][1]) + track_struct['tracklet_mat']['track_cluster_t_idx'].append(new_t_idx[1]) + else: + track_struct['tracklet_mat']['track_cluster'][change_idx[min_idx][1]] = new_set[min_idx][1] + track_struct['tracklet_mat']['track_cluster_t_idx'][change_idx[min_idx][1]] = new_t_idx[1] + + for m in range(track_struct['track_params']['num_time_cluster']): + #import pdb; pdb.set_trace() + if change_idx[min_idx][0] in track_struct['tracklet_mat']['time_cluster'][m]: + track_struct['tracklet_mat']['time_cluster'][m].remove(change_idx[min_idx][0]) + if change_idx[min_idx][1] in track_struct['tracklet_mat']['time_cluster'][m]: + track_struct['tracklet_mat']['time_cluster'][m].remove(change_idx[min_idx][1]) + + for m in range(track_struct['track_params']['num_time_cluster']): + if m in new_t_idx[0]: + track_struct['tracklet_mat']['time_cluster'][m].append(change_idx[min_idx][0]) + if m in new_t_idx[1]: + track_struct['tracklet_mat']['time_cluster'][m].append(change_idx[min_idx][1]) + + if change_idx[min_idx][0]>=len(track_struct['tracklet_mat']['cluster_cost']): + for m in range(len(track_struct['tracklet_mat']['cluster_cost']),change_idx[min_idx][0]): + track_struct['tracklet_mat']['cluster_cost'].append(0) + track_struct['tracklet_mat']['cluster_cost'].append(new_C[min_idx][0]) + else: + track_struct['tracklet_mat']['cluster_cost'][change_idx[min_idx][0]] = new_C[min_idx][0] + + if change_idx[min_idx][1]>=len(track_struct['tracklet_mat']['cluster_cost']): + for m in range(len(track_struct['tracklet_mat']['cluster_cost']),change_idx[min_idx][1]): + track_struct['tracklet_mat']['cluster_cost'].append([]) + track_struct['tracklet_mat']['cluster_cost'].append(new_C[min_idx][1]) + else: + track_struct['tracklet_mat']['cluster_cost'][change_idx[min_idx][1]] = new_C[min_idx][1] + + for k in range(len(track_struct['tracklet_mat']['track_cluster'][change_idx[min_idx][0]])): + track_struct['tracklet_mat']['track_class'][track_struct['tracklet_mat'] \ + ['track_cluster'][change_idx[min_idx][0]][k]] = change_idx[min_idx][0] + + for k in range(len(track_struct['tracklet_mat']['track_cluster'][change_idx[min_idx][1]])): + track_struct['tracklet_mat']['track_class'][track_struct['tracklet_mat'] \ + ['track_cluster'][change_idx[min_idx][1]][k]] = change_idx[min_idx][1] + #import pdb; pdb.set_trace() + return change_flag + +def crop_det(tracklet_mat, crop_size, img_folder, crop_det_folder, flag): + + if not os.path.isdir(crop_det_folder): + os.makedirs(crop_det_folder) + + N_tracklet = tracklet_mat['xmin_mat'].shape[0] + T = tracklet_mat['xmin_mat'].shape[1] + img_list = os.listdir(img_folder) + cnt = 0 + for n in range(T): + track_ids = np.where(tracklet_mat['xmax_mat'][:,n]!=-1) + if len(track_ids)==0: + continue + track_ids = track_ids[0] + img_name = track_lib.file_name(n+1,file_len)+'.jpg' + if img_name in img_list: + img_path = img_folder+'/'+img_name + img = misc.imread(img_path) + img_size = img.shape + else: + continue + + for m in range(len(track_ids)): + if flag==0: + xmin = int(max(0,tracklet_mat['xmin_mat'][track_ids[m],n])) + xmax = int(min(img.shape[1]-1,tracklet_mat['xmax_mat'][track_ids[m],n])) + ymin = int(max(0,tracklet_mat['ymin_mat'][track_ids[m],n])) + ymax = int(min(img.shape[0]-1,tracklet_mat['ymax_mat'][track_ids[m],n])) + img_patch = img[ymin:ymax,xmin:xmax,:] + img_patch = misc.imresize(img_patch, size=[crop_size,crop_size]) + class_name = track_lib.file_name(track_ids[m]+1,4) + patch_name = class_name+'_'+track_lib.file_name(n+1,4)+'.png' + save_path = crop_det_folder+'/'+class_name + if not os.path.isdir(save_path): + os.makedirs(save_path) + save_path = save_path+'/'+patch_name + + #import pdb; pdb.set_trace() + misc.imsave(save_path, img_patch) + cnt = cnt+1 + return cnt, img_size + +def feature_encode(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, + batch_size_placeholder, control_placeholder, embeddings, labels, image_paths, + batch_size, distance_metric): + + # Run forward pass to calculate embeddings + #print('Runnning forward pass on LFW images') + + use_flipped_images = False + use_fixed_image_standardization = False + use_random_rotate = False + use_radnom_crop = False + # Enqueue one epoch of image paths and labels + nrof_embeddings = len(image_paths) # nrof_pairs * nrof_images_per_pair + nrof_flips = 2 if use_flipped_images else 1 + nrof_images = nrof_embeddings * nrof_flips + labels_array = np.expand_dims(np.arange(0,nrof_images),1) + image_paths_array = np.expand_dims(np.repeat(np.array(image_paths),nrof_flips),1) + control_array = np.zeros_like(labels_array, np.int32) + + if use_fixed_image_standardization: + control_array += np.ones_like(labels_array)*facenet.FIXED_STANDARDIZATION + if use_flipped_images: + # Flip every second image + control_array += (labels_array % 2)*facenet.FLIP + if use_random_rotate: + control_array += facenet.RANDOM_ROTATE + if use_radnom_crop: + control_array += facenet.RANDOM_CROP + + sess.run(enqueue_op, {image_paths_placeholder: image_paths_array, + labels_placeholder: labels_array, control_placeholder: control_array}) + + embedding_size = int(embeddings.get_shape()[1]) + assert nrof_images % batch_size == 0, 'The number of LFW images must be an integer multiple of the LFW batch size' + nrof_batches = nrof_images // batch_size + emb_array = np.zeros((nrof_images, embedding_size)) + lab_array = np.zeros((nrof_images,)) + for i in range(nrof_batches): + feed_dict = {phase_train_placeholder:False, batch_size_placeholder:batch_size} + emb, lab = sess.run([embeddings, labels], feed_dict=feed_dict) + lab_array[lab] = lab + emb_array[lab, :] = emb + if i % 10 == 9: + print('.', end='') + sys.stdout.flush() + #import pdb; pdb.set_trace() + #np.savetxt("emb_array.csv", emb_array, delimiter=",") + return emb_array + +def feature_extract2(feature_size, num_patch, max_length, patch_folder, triplet_model): + f_image_size = 160 + distance_metric = 0 + + #****************** + triplet_graph = tf.Graph() + with triplet_graph.as_default(): + image_paths_placeholder = tf.placeholder(tf.string, shape=(None,1), name='image_paths') + labels_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='labels') + batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') + control_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='control') + phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') + + nrof_preprocess_threads = 4 + image_size = (f_image_size, f_image_size) + eval_input_queue = data_flow_ops.FIFOQueue(capacity=2000000, + dtypes=[tf.string, tf.int32, tf.int32], + shapes=[(1,), (1,), (1,)], + shared_name=None, name=None) + eval_enqueue_op = eval_input_queue.enqueue_many([image_paths_placeholder, + labels_placeholder, control_placeholder], + name='eval_enqueue_op') + image_batch, label_batch = facenet.create_input_pipeline(eval_input_queue, image_size, + nrof_preprocess_threads, batch_size_placeholder) + + triplet_sess = tf.Session(graph=triplet_graph) + with triplet_sess.as_default(): + with triplet_graph.as_default(): + # Load the model + input_map = {'image_batch': image_batch, 'label_batch': label_batch, 'phase_train': phase_train_placeholder} + facenet.load_model(triplet_model, input_map=input_map) + + with triplet_sess.as_default(): + with triplet_graph.as_default(): + # Get output tensor + embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") + coord = tf.train.Coordinator() + tf.train.start_queue_runners(coord=coord, sess=triplet_sess) + + fea_mat = np.zeros((num_patch,feature_size-4+2)) + tracklet_list = os.listdir(patch_folder) + N_tracklet = len(tracklet_list) + cnt = 0 + for n in range(N_tracklet): + tracklet_folder = patch_folder+'/'+tracklet_list[n] + patch_list = os.listdir(tracklet_folder) + + # get patch list, track_id and fr_id, starts from 1 + prev_cnt = cnt + for m in range(len(patch_list)): + # track_id + fea_mat[cnt,0] = n+1 + # fr_id + fea_mat[cnt,1] = int(patch_list[m][-8:-4]) + cnt = cnt+1 + patch_list[m] = tracklet_folder+'/'+patch_list[m] + + + #print(n) + lfw_batch_size = len(patch_list) + emb_array = feature_encode(triplet_sess, eval_enqueue_op, image_paths_placeholder, labels_placeholder, + phase_train_placeholder,batch_size_placeholder, control_placeholder, + embeddings, label_batch, patch_list, lfw_batch_size, distance_metric) + fea_mat[prev_cnt:prev_cnt+lfw_batch_size,2:] = np.copy(emb_array) + return fea_mat + +def hist_feature_extract(feature_size, num_patch, max_length, patch_folder): + fea_mat = np.zeros((num_patch,feature_size-4+2)) + tracklet_list = os.listdir(patch_folder) + N_tracklet = len(tracklet_list) + cnt = 0 + for n in range(N_tracklet): + tracklet_folder = patch_folder+'/'+tracklet_list[n] + patch_list = os.listdir(tracklet_folder) + + # get patch list, track_id and fr_id, starts from 1 + prev_cnt = cnt + for m in range(len(patch_list)): + # track_id + fea_mat[cnt,0] = n+1 + # fr_id + fea_mat[cnt,1] = int(patch_list[m][-8:-4]) + + patch_list[m] = tracklet_folder+'/'+patch_list[m] + patch_img = imread(patch_list[m]) + fea_mat[cnt,2:] = track_lib.extract_hist(patch_img) + #import pdb; pdb.set_trace() + cnt = cnt+1 + return fea_mat + +def feature_extract(feature_size, num_patch, max_length, patch_folder, triplet_model): + f_image_size = 160 + distance_metric = 0 + with tf.Graph().as_default(): + + with tf.Session() as sess: + + image_paths_placeholder = tf.placeholder(tf.string, shape=(None,1), name='image_paths') + labels_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='labels') + batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') + control_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='control') + phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') + + nrof_preprocess_threads = 4 + image_size = (f_image_size, f_image_size) + eval_input_queue = data_flow_ops.FIFOQueue(capacity=2000000, + dtypes=[tf.string, tf.int32, tf.int32], + shapes=[(1,), (1,), (1,)], + shared_name=None, name=None) + eval_enqueue_op = eval_input_queue.enqueue_many([image_paths_placeholder, + labels_placeholder, control_placeholder], + name='eval_enqueue_op') + image_batch, label_batch = facenet.create_input_pipeline(eval_input_queue, image_size, + nrof_preprocess_threads, batch_size_placeholder) + + # Load the model + input_map = {'image_batch': image_batch, 'label_batch': label_batch, 'phase_train': phase_train_placeholder} + facenet.load_model(triplet_model, input_map=input_map) + + # Get output tensor + embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") + coord = tf.train.Coordinator() + tf.train.start_queue_runners(coord=coord, sess=sess) + + fea_mat = np.zeros((num_patch,feature_size-4+2)) + tracklet_list = os.listdir(patch_folder) + N_tracklet = len(tracklet_list) + cnt = 0 + for n in range(N_tracklet): + tracklet_folder = patch_folder+'/'+tracklet_list[n] + patch_list = os.listdir(tracklet_folder) + + # get patch list, track_id and fr_id, starts from 1 + prev_cnt = cnt + for m in range(len(patch_list)): + # track_id + fea_mat[cnt,0] = n+1 + # fr_id + fea_mat[cnt,1] = int(patch_list[m][-8:-4]) + cnt = cnt+1 + patch_list[m] = tracklet_folder+'/'+patch_list[m] + + + #print(n) + lfw_batch_size = len(patch_list) + emb_array = feature_encode(sess, eval_enqueue_op, image_paths_placeholder, labels_placeholder, + phase_train_placeholder,batch_size_placeholder, control_placeholder, + embeddings, label_batch, patch_list, lfw_batch_size, distance_metric) + fea_mat[prev_cnt:prev_cnt+lfw_batch_size,2:] = np.copy(emb_array) + return fea_mat + +def color_table(num): + digit = '0123456789ABCDEF' + table = [] + for n in range(num): + select_idx = np.random.randint(16, size=6) + for k in range(6): + if k==0: + temp_color = digit[select_idx[k]] + else: + temp_color = temp_color+digit[select_idx[k]] + table.append(temp_color) + return table + +def draw_result(img_folder, save_folder): + #track_struct = pickle.load(open(track_struct_path,'rb')) + + global track_struct + tracklet_mat = track_struct['final_tracklet_mat'] + img_list = os.listdir(img_folder) + table = color_table(len(tracklet_mat['track_cluster'])) + #import pdb; pdb.set_trace() + for n in range(track_struct['final_tracklet_mat']['xmin_mat'].shape[1]): + img_name = track_lib.file_name(n+1,file_len)+'.jpg' + if img_name not in img_list: + continue + img_path = img_folder+'/'+img_name + img = misc.imread(img_path) + + # Create figure and axes + fig,ax = plt.subplots(1) + + # Display the image + ax.imshow(img) + + # Create Rectangle patches + + + for k in range(tracklet_mat['xmin_mat'].shape[0]): + # + track_id = int(tracklet_mat['track_id_mat'][k,n]) + + ''' + if track_id==-1: + track_class = -1 + else: + track_class = int(tracklet_mat['track_class'][track_id,0]) + ''' + + if tracklet_mat['xmin_mat'][k,n]!=-1: + xmin = tracklet_mat['xmin_mat'][k,n] + ymin = tracklet_mat['ymin_mat'][k,n] + xmax = tracklet_mat['xmax_mat'][k,n] + ymax = tracklet_mat['ymax_mat'][k,n] + w = xmax-xmin + h = ymax-ymin + rect = patches.Rectangle((xmin,ymin),w,h,linewidth=1,edgecolor='#'+table[k], facecolor='none') + img_text = plt.text(xmin,ymin,str(k)+'_'+str(track_id),fontsize=6,color='#'+table[k]) + # Add the patch to the Axes + ax.add_patch(rect) + + if not os.path.isdir(save_folder): + os.makedirs(save_folder) + save_path = save_folder+'/'+img_name + plt.savefig(save_path,bbox_inches='tight',dpi=400) + + plt.clf() + plt.close('all') + #plt.show() + #import pdb; pdb.set_trace() + return + +def convert_frames_to_video(pathIn,pathOut,fps): + frame_array = [] + files = [f for f in os.listdir(pathIn) if os.path.isfile(os.path.join(pathIn, f))] + + #for sorting the file names properly + #files.sort(key = lambda x: int(x[5:-4])) + + for i in range(len(files)): + filename=pathIn + files[i] + #reading each files + img = cv2.imread(filename) + height, width, layers = img.shape + + if i==0: + size = (width,height) + img = cv2.resize(img,size) + #print(filename) + #inserting the frames into an image array + frame_array.append(img) + + out = cv2.VideoWriter(pathOut,cv2.VideoWriter_fourcc(*'DIVX'), fps, size) + + for i in range(len(frame_array)): + # writing to a image array + out.write(frame_array[i]) + out.release() + +def wrt_txt(tracklet_mat): + num_det = np.sum(tracklet_mat['xmin_mat']!=-1) + f = np.zeros((num_det, 9), dtype=int) + cnt = 0 + for n in range(tracklet_mat['xmin_mat'].shape[1]): + for m in range(tracklet_mat['xmin_mat'].shape[0]): + if tracklet_mat['xmin_mat'][m,n]==-1: + continue + f[cnt,0] = n+1 + f[cnt,1] = m+1 + f[cnt,2] = tracklet_mat['xmin_mat'][m,n] + f[cnt,3] = tracklet_mat['ymin_mat'][m,n] + f[cnt,4] = tracklet_mat['xmax_mat'][m,n]-tracklet_mat['xmin_mat'][m,n]+1 + f[cnt,5] = tracklet_mat['ymax_mat'][m,n]-tracklet_mat['ymin_mat'][m,n]+1 + f[cnt,6] = -1 + f[cnt,7] = -1 + f[cnt,8] = -1 + cnt = cnt+1 + np.savetxt(txt_result_path, f, delimiter=',',fmt='%d') + +def time_cluster_check(): + + global track_struct + tracklet_mat = track_struct['tracklet_mat'] + N_cluster = len(tracklet_mat['track_cluster']) + err_flag = 0 + #import pdb; pdb.set_trace() + for n in range(N_cluster): + if len(tracklet_mat['track_cluster'][n])==0: + if tracklet_mat['track_cluster_t_idx'][n][0]!=-1: + err_flag = 1 + import pdb; pdb.set_trace() + return err_flag + elif tracklet_mat['track_cluster'][n][0] in remove_set: + if tracklet_mat['track_cluster_t_idx'][n][0]!=-1: + err_flag = 1 + import pdb; pdb.set_trace() + return err_flag + else: + t_min_array = np.zeros((len(tracklet_mat['track_cluster'][n]),1)) + t_max_array = np.zeros((len(tracklet_mat['track_cluster'][n]),1)) + for m in range(len(tracklet_mat['track_cluster'][n])): + track_id = tracklet_mat['track_cluster'][n][m] + t_min_array[m,0] = tracklet_mat['track_interval'][track_id,0] + t_max_array[m,0] = tracklet_mat['track_interval'][track_id,1] + min_time_cluster_idx = int(np.floor(max(np.min(t_min_array) + -track_struct['track_params']['t_dist_thresh']-5,0) + /track_struct['track_params']['time_cluster_dist'])) + max_time_cluster_idx = int(np.floor(min(np.max(t_max_array) + +track_struct['track_params']['t_dist_thresh']+5,tracklet_mat['xmin_mat'].shape[1]-1) + /track_struct['track_params']['time_cluster_dist'])) + temp_t_idx = list(range(min_time_cluster_idx,max_time_cluster_idx+1)) + for m in range(len(temp_t_idx)): + if n not in tracklet_mat['time_cluster'][temp_t_idx[m]]: + err_flag = 1 + import pdb; pdb.set_trace() + return err_flag + + for n in range(len(tracklet_mat['time_cluster'])): + for m in range(len(tracklet_mat['time_cluster'][n])): + cluster_id = tracklet_mat['time_cluster'][n][m] + + if len(tracklet_mat['track_cluster'][cluster_id])==0: + err_flag = 1 + import pdb; pdb.set_trace() + return err_flag + elif tracklet_mat['track_cluster'][cluster_id][0] in remove_set: + err_flag = 1 + import pdb; pdb.set_trace() + return err_flag + else: + t_min_array = np.zeros((len(tracklet_mat['track_cluster'][cluster_id]),1)) + t_max_array = np.zeros((len(tracklet_mat['track_cluster'][cluster_id]),1)) + for k in range(len(tracklet_mat['track_cluster'][cluster_id])): + track_id = tracklet_mat['track_cluster'][cluster_id][k] + t_min_array[k,0] = tracklet_mat['track_interval'][track_id,0] + t_max_array[k,0] = tracklet_mat['track_interval'][track_id,1] + min_time_cluster_idx = int(np.floor(max(np.min(t_min_array) + -track_struct['track_params']['t_dist_thresh']-5,0) + /track_struct['track_params']['time_cluster_dist'])) + max_time_cluster_idx = int(np.floor(min(np.max(t_max_array) + +track_struct['track_params']['t_dist_thresh']+5,tracklet_mat['xmin_mat'].shape[1]-1) + /track_struct['track_params']['time_cluster_dist'])) + temp_t_idx = list(range(min_time_cluster_idx,max_time_cluster_idx+1)) + if n not in temp_t_idx: + err_flag = 1 + import pdb; pdb.set_trace() + return err_flag + return err_flag + +def TC_tracker(): + M = track_lib.load_detection(det_path, 'MOT_tr') + if os.path.isfile(gt_path)==True: + M_gt = track_lib.load_detection(gt_path, 'MOT_gt') + + ''' + plt.hist(M[:,-1],bins=20)#, bins = list(np.array(range(0,22))/21)) + plt.title("histogram") + plt.show() + import pdb; pdb.set_trace() + ''' + global track_struct + global all_fea_mat + global all_fea_label + track_struct = {'track_params':{}} + track_struct['track_params']['num_fr'] = int(np.max(M[:,0])-np.min(M[:,0])+1) + track_struct['track_params']['IOU_thresh'] = 0.5 + track_struct['track_params']['color_thresh'] = 0.05 + track_struct['track_params']['det_thresh'] = 0 + track_struct['track_params']['det_y_thresh'] = 0 + track_struct['track_params']['det_y_thresh2'] = float("inf") + track_struct['track_params']['det_y_thresh3'] = float("inf") + track_struct['track_params']['det_y_thresh4'] = 0 + track_struct['track_params']['det_h_thresh'] = 0 + track_struct['track_params']['det_h_thresh2'] = float("inf") + track_struct['track_params']['det_ratio_thresh1'] = float("inf") + track_struct['track_params']['linear_pred_thresh'] = 5 + track_struct['track_params']['t_dist_thresh'] = 45 + track_struct['track_params']['track_overlap_thresh'] = 0.1 + track_struct['track_params']['search_radius'] = 1 + track_struct['track_params']['const_fr_thresh'] = 1 + track_struct['track_params']['crop_size'] = 182 + track_struct['track_params']['time_cluster_dist'] = 100 + track_struct['track_params']['merge_IOU'] = 0.7 + track_struct['track_params']['merge_mode'] = 1 + track_struct['track_params']['pre_len'] = 5 + track_struct['track_params']['pre_det_score'] = 1 + track_struct['track_params']['svm_score_flag'] = 0 + track_struct['track_params']['h_score_flag'] = 0 + track_struct['track_params']['y_score_flag'] = 0 + track_struct['track_params']['IOU_gt_flag'] = 0 + track_struct['track_params']['use_F'] = 0 + track_struct['track_params']['cost_bias'] = 0 + track_struct['track_params']['appearance_mode'] = 0 + track_struct['track_params']['use_net'] = 1 + track_struct['track_params']['num_time_cluster'] = int(np.ceil(track_struct['track_params']['num_fr'] + /track_struct['track_params']['time_cluster_dist'])) + track_struct['track_obj'] = {'track_id':[], 'bbox':[], 'det_score':[], 'mean_color':[]} + track_struct['tracklet_mat'] = {'xmin_mat':[], 'ymin_mat':[], 'xmax_mat':[], 'ymax_mat':[], + 'det_score_mat':[]} + + if os.path.isfile(gt_path)==True: + track_struct['tracklet_mat']['conf_matrix_tracklet'] = np.zeros((2,2)) + + track_struct['track_obj']['svm_score'] = [] + track_struct['tracklet_mat']['svm_score_mat'] = [] + + track_struct['track_obj']['h_score'] = [] + track_struct['tracklet_mat']['h_score_mat'] = [] + + track_struct['track_obj']['y_score'] = [] + track_struct['tracklet_mat']['y_score_mat'] = [] + + track_struct['track_obj']['IOU_gt'] = [] + track_struct['tracklet_mat']['IOU_gt_mat'] = [] + + if track_struct['track_params']['use_F']==1: + F_set = loadmat(F_path) + track_struct['tracklet_mat']['F'] = F_set['F_set'] + + img_list = os.listdir(img_folder) + #track_struct['track_params']['num_fr'] = len(img_list) + for n in range(track_struct['track_params']['num_fr']): + + + # fr idx starts from 1 + fr_idx = n+1 + idx = np.where(np.logical_and(M[:,0]==fr_idx,M[:,5]>track_struct['track_params']['det_thresh']))[0] + if len(idx)>1: + choose_idx, _ = track_lib.merge_bbox(M[idx,1:5], track_struct['track_params']['merge_IOU'], M[idx,5], + track_struct['track_params']['merge_mode']) + #import pdb; pdb.set_trace() + temp_M = np.zeros((len(choose_idx),M.shape[1])) + temp_M[:,:] = M[idx[choose_idx],:] + elif len(idx)==1: + temp_M = np.zeros((1,M.shape[1])) + temp_M[0,:] = M[idx,:] + else: + temp_M = [] + + if len(temp_M)!=0: + temp_M = track_lib.remove_det(temp_M, track_struct['track_params']['det_thresh'], + track_struct['track_params']['det_y_thresh'], + track_struct['track_params']['det_h_thresh'], + track_struct['track_params']['det_y_thresh2'], + track_struct['track_params']['det_ratio_thresh1'], + track_struct['track_params']['det_h_thresh2'], + track_struct['track_params']['det_y_thresh3'], + track_struct['track_params']['det_y_thresh4']) + + num_bbox = len(temp_M) + + #img_size = [1920,1080] + + #************************************ + + img_name = track_lib.file_name(fr_idx,file_len)+'.jpg' + if img_name in img_list: + img_path = img_folder+'/'+img_name + img = misc.imread(img_path) + img_size = img.shape + else: + num_bbox = 0 + + if num_bbox!=0: + temp_M[:,1:5] = track_lib.crop_bbox_in_image(temp_M[:,1:5], img_size) + + #************************************ + + track_struct['track_obj']['track_id'].append([]) + if num_bbox==0: + track_struct['track_obj']['bbox'].append([]) + track_struct['track_obj']['det_score'].append([]) + track_struct['track_obj']['mean_color'].append([]) + + track_struct['track_obj']['svm_score'].append([]) + + track_struct['track_obj']['h_score'].append([]) + + track_struct['track_obj']['y_score'].append([]) + + track_struct['track_obj']['IOU_gt'].append([]) + else: + track_struct['track_obj']['bbox'].append(temp_M[:,1:5]) + track_struct['track_obj']['det_score'].append(temp_M[:,5]) + if track_struct['track_params']['svm_score_flag']==1: + track_struct['track_obj']['svm_score'].append(temp_M[:,6]) + else: + track_struct['track_obj']['svm_score'].append([]) + if track_struct['track_params']['h_score_flag']==1: + track_struct['track_obj']['h_score'].append(temp_M[:,7]) + else: + track_struct['track_obj']['h_score'].append([]) + if track_struct['track_params']['y_score_flag']==1: + track_struct['track_obj']['y_score'].append(temp_M[:,8]) + else: + track_struct['track_obj']['y_score'].append([]) + + if track_struct['track_params']['IOU_gt_flag']==1: + track_struct['track_obj']['IOU_gt'].append(temp_M[:,9]) + else: + track_struct['track_obj']['IOU_gt'].append([]) + temp_mean_color = np.zeros((num_bbox,3)) + for k in range(num_bbox): + xmin = int(max(0,temp_M[k,1])) + ymin = int(max(0,temp_M[k,2])) + xmax = int(min(img.shape[1]-1,temp_M[k,1]+temp_M[k,3])) + ymax = int(min(img.shape[0]-1,temp_M[k,2]+temp_M[k,4])) + temp_mean_color[k,0] = np.mean(img[ymin:ymax+1,xmin:xmax+1,0]) + temp_mean_color[k,1] = np.mean(img[ymin:ymax+1,xmin:xmax+1,1]) + temp_mean_color[k,2] = np.mean(img[ymin:ymax+1,xmin:xmax+1,2]) + temp_mean_color = temp_mean_color/255.0 + if np.isnan(np.sum(temp_mean_color)): + import pdb; pdb.set_trace() + track_struct['track_obj']['mean_color'].append(temp_mean_color.copy()) + #import pdb; pdb.set_trace() + + #import pdb; pdb.set_trace() + # forward tracking + init_num = 20000 + track_struct['tracklet_mat']['xmin_mat'] = -1*np.ones((init_num,track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['ymin_mat'] = -1*np.ones((init_num,track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['xmax_mat'] = -1*np.ones((init_num,track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['ymax_mat'] = -1*np.ones((init_num,track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['det_score_mat'] = -1*np.ones((init_num,track_struct['track_params']['num_fr'])) + if track_struct['track_params']['svm_score_flag']==1: + track_struct['tracklet_mat']['svm_score_mat'] = -1*np.ones((init_num,track_struct['track_params']['num_fr'])) + if track_struct['track_params']['h_score_flag']==1: + track_struct['tracklet_mat']['h_score_mat'] = -1*np.ones((init_num,track_struct['track_params']['num_fr'])) + if track_struct['track_params']['y_score_flag']==1: + track_struct['tracklet_mat']['y_score_mat'] = -1*np.ones((init_num,track_struct['track_params']['num_fr'])) + if track_struct['track_params']['IOU_gt_flag']==1: + track_struct['tracklet_mat']['IOU_gt_mat'] = -1*np.ones((init_num,track_struct['track_params']['num_fr'])) + + max_id = 0 + for n in range(track_struct['track_params']['num_fr']-1): + print(n) + #print(max_id) + if os.path.isfile(gt_path)==True: + track_struct['tracklet_mat'], track_struct['track_obj']['track_id'][n], track_struct['track_obj']['track_id'][n+1], max_id \ + = forward_tracking(track_struct['track_obj']['track_id'][n], track_struct['track_obj']['track_id'][n+1], + track_struct['track_obj']['bbox'][n], track_struct['track_obj']['bbox'][n+1], + track_struct['track_obj']['det_score'][n], track_struct['track_obj']['det_score'][n+1], + track_struct['track_obj']['svm_score'][n], track_struct['track_obj']['svm_score'][n+1], + track_struct['track_obj']['h_score'][n], track_struct['track_obj']['h_score'][n+1], + track_struct['track_obj']['y_score'][n], track_struct['track_obj']['y_score'][n+1], + track_struct['track_obj']['IOU_gt'][n], track_struct['track_obj']['IOU_gt'][n+1], + track_struct['track_obj']['mean_color'][n], track_struct['track_obj']['mean_color'][n+1], + n+2, track_struct['track_params'], track_struct['tracklet_mat'], max_id, M_gt) + else: + track_struct['tracklet_mat'], track_struct['track_obj']['track_id'][n], track_struct['track_obj']['track_id'][n+1], max_id \ + = forward_tracking(track_struct['track_obj']['track_id'][n], track_struct['track_obj']['track_id'][n+1], + track_struct['track_obj']['bbox'][n], track_struct['track_obj']['bbox'][n+1], + track_struct['track_obj']['det_score'][n], track_struct['track_obj']['det_score'][n+1], + track_struct['track_obj']['svm_score'][n], track_struct['track_obj']['svm_score'][n+1], + track_struct['track_obj']['h_score'][n], track_struct['track_obj']['h_score'][n+1], + track_struct['track_obj']['y_score'][n], track_struct['track_obj']['y_score'][n+1], + track_struct['track_obj']['IOU_gt'][n], track_struct['track_obj']['IOU_gt'][n+1], + track_struct['track_obj']['mean_color'][n], track_struct['track_obj']['mean_color'][n+1], + n+2, track_struct['track_params'], track_struct['tracklet_mat'], max_id, []) + + #import pdb; pdb.set_trace() + mask = track_struct['tracklet_mat']['xmin_mat']==-1 + mask = np.sum(mask,axis=1) + neg_idx = np.where(mask==track_struct['track_params']['num_fr'])[0] + track_struct['tracklet_mat']['xmin_mat'] = np.delete(track_struct['tracklet_mat']['xmin_mat'], neg_idx, axis=0) + track_struct['tracklet_mat']['ymin_mat'] = np.delete(track_struct['tracklet_mat']['ymin_mat'], neg_idx, axis=0) + track_struct['tracklet_mat']['xmax_mat'] = np.delete(track_struct['tracklet_mat']['xmax_mat'], neg_idx, axis=0) + track_struct['tracklet_mat']['ymax_mat'] = np.delete(track_struct['tracklet_mat']['ymax_mat'], neg_idx, axis=0) + track_struct['tracklet_mat']['det_score_mat'] = np.delete(track_struct['tracklet_mat']['det_score_mat'], neg_idx, axis=0) + if track_struct['track_params']['svm_score_flag']==1: + track_struct['tracklet_mat']['svm_score_mat'] = np.delete(track_struct['tracklet_mat']['svm_score_mat'], neg_idx, axis=0) + if track_struct['track_params']['h_score_flag']==1: + track_struct['tracklet_mat']['h_score_mat'] = np.delete(track_struct['tracklet_mat']['h_score_mat'], neg_idx, axis=0) + if track_struct['track_params']['y_score_flag']==1: + track_struct['tracklet_mat']['y_score_mat'] = np.delete(track_struct['tracklet_mat']['y_score_mat'], neg_idx, axis=0) + if track_struct['track_params']['IOU_gt_flag']==1: + track_struct['tracklet_mat']['IOU_gt_mat'] = np.delete(track_struct['tracklet_mat']['IOU_gt_mat'], neg_idx, axis=0) + #import pdb; pdb.set_trace() + + + #******************************** + ''' + R_struct = loadmat('D:/Data/Kresimir video/camera pose/R.mat') + T_struct = loadmat('D:/Data/Kresimir video/camera pose/T.mat') + K = np.zeros((3,3)) + K[0,0] = 1662.8 + K[0,2] = 960.5 + K[1,1] = 1662.8 + K[1,2] = 540.5 + K[2,2] = 1 + cam_st = 657-1 + cam_end = 3521-1 + R_set = R_struct['R'][0][cam_st:cam_end+1] + t_set = T_struct['T'][0][cam_st:cam_end+1] + + location_3d_mat = np.zeros((M.shape[0],14)) # fr_id, tracklet_id, x, y, z, s + cnt = 0 + for n in range(len(track_struct['tracklet_mat']['xmin_mat'])): + xmin = track_struct['tracklet_mat']['xmin_mat'][n,cam_st:cam_end+1] + ymin = track_struct['tracklet_mat']['ymin_mat'][n,cam_st:cam_end+1] + xmax = track_struct['tracklet_mat']['xmax_mat'][n,cam_st:cam_end+1] + ymax = track_struct['tracklet_mat']['ymax_mat'][n,cam_st:cam_end+1] + t_idx = np.where(xmin!=-1)[0] + if len(t_idx)==0: + continue + xmin = xmin[t_idx] + ymin = ymin[t_idx] + xmax = xmax[t_idx] + ymax = ymax[t_idx] + + #if n>=35: + # break + + X,X_center = track_lib.localization3D_by_bbox(xmin,ymin,xmax,ymax,K,R_set[t_idx],t_set[t_idx]) + print(n) + + for k in range(len(t_idx)): + location_3d_mat[cnt,0] = int(cam_st+t_idx[k])+1 + location_3d_mat[cnt,1] = n+1 + location_3d_mat[cnt,2:] = X[12*k:12*k+12,0] + #location_3d_mat[cnt,2] = X_center[0,k] + #location_3d_mat[cnt,3] = X_center[1,k] + #location_3d_mat[cnt,4] = X_center[2,k] + #location_3d_mat[cnt,5] = X_center[3,k] + + cnt = cnt+1 + + remove_idx = [] + for n in range(len(location_3d_mat)): + if np.sum(location_3d_mat[n,:])==0: + remove_idx.append(n) + + location_3d_mat = np.delete(location_3d_mat, np.array(remove_idx), axis=0) + np.savetxt('D:/Data/Kresimir video/fish_3d_2.txt', location_3d_mat, delimiter=',',fmt='%1.32e') + import pdb; pdb.set_trace() + ''' + #***************************************** + # tracklet clustering + iters = 20 + track_struct['tracklet_mat'] = preprocessing(track_struct['tracklet_mat'], track_struct['track_params']['pre_len'], + track_struct['track_params']) + + #import pdb; pdb.set_trace() + #pickle.dump(track_struct, open(track_struct_path,'wb')) + #return track_struct + + + + #import pdb; pdb.set_trace() + + num_patch, img_size = crop_det(track_struct['tracklet_mat'], track_struct['track_params']['crop_size'], + img_folder, crop_det_folder, 0) + + + if track_struct['track_params']['appearance_mode']==0: + track_struct['tracklet_mat']['appearance_fea_mat'] = feature_extract(feature_size, num_patch, max_length, + crop_det_folder, triplet_model) + elif track_struct['track_params']['appearance_mode']==1: + track_struct['tracklet_mat']['appearance_fea_mat'] = hist_feature_extract(feature_size, num_patch, max_length, + crop_det_folder) + + # remove set + ''' + get_tracklet_scores() + temp_remove_set = remove_tracklet(track_struct['tracklet_mat']) + remove_set.extend(temp_remove_set) + ''' + + #import pdb; pdb.set_trace() + #******************* + track_struct['tracklet_mat']['appearance_fea_mat'][:,2:] = 10*track_struct['tracklet_mat']['appearance_fea_mat'][:,2:] + #track_struct['tracklet_mat']['appearance_fea_mat'][:,2:] = track_struct['tracklet_mat']['appearance_fea_mat'][:,2:] \ + # +np.random.normal(0,0.4,(track_struct['tracklet_mat']['appearance_fea_mat'].shape[0],512)) + + #import pdb; pdb.set_trace() + track_struct['track_params']['img_size'] = img_size + track_struct['tracklet_mat']['comb_track_cost'] = np.zeros((len(track_struct['tracklet_mat']['xmin_mat']), + len(track_struct['tracklet_mat']['xmin_mat']))) + track_struct['tracklet_mat']['comb_track_cost_mask'] = np.zeros((len(track_struct['tracklet_mat']['xmin_mat']), + len(track_struct['tracklet_mat']['xmin_mat']))) + + pickle.dump(track_struct['tracklet_mat']['appearance_fea_mat'], open(appear_mat_path, 'wb')) + #import pdb; pdb.set_trace() + + # load nn + batch_X_x = tf.placeholder(tf.float32, [None, 1, max_length, 1]) + batch_X_y = tf.placeholder(tf.float32, [None, 1, max_length, 1]) + batch_X_w = tf.placeholder(tf.float32, [None, 1, max_length, 1]) + batch_X_h = tf.placeholder(tf.float32, [None, 1, max_length, 1]) + batch_X_a = tf.placeholder(tf.float32, [None, feature_size-4, max_length, 1]) + batch_mask_1 = tf.placeholder(tf.float32, [None, 1, max_length, 2]) + batch_mask_2 = tf.placeholder(tf.float32, [None, feature_size-4, max_length, 2]) + batch_Y = tf.placeholder(tf.int32, [None, num_classes]) + keep_prob = tf.placeholder(tf.float32) + + y_conv = seq_nn_3d_v2.seq_nn(batch_X_x,batch_X_y,batch_X_w,batch_X_h,batch_X_a,batch_mask_1, + batch_mask_2,batch_Y,max_length,feature_size,keep_prob) + + cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=batch_Y, logits=y_conv)) + train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) + correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(batch_Y, 1)) + accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) + + init = tf.global_variables_initializer() + saver = tf.train.Saver() + + with tf.Session() as sess: + + saver.restore(sess, seq_model) + print("Model restored.") + + #aa = tf.get_collection('h_pool_flat') + #import pdb; pdb.set_trace() + + for n in range(iters): + print("iteration") + print(n) + change_flag = tracklet_clustering(sess, + batch_X_x, batch_X_y, batch_X_w, batch_X_h, batch_X_a, batch_mask_1, + batch_mask_2, batch_Y, keep_prob, y_conv) + if change_flag==0: + #import pdb; pdb.set_trace() + time_check_flag = time_cluster_check() + break + + + #pickle.dump(save_fea_mat, open(save_fea_path, 'wb')) + #pickle.dump(track_set, open(save_label_path,'wb')) + #pickle.dump(remove_set, open(save_remove_path,'wb')) + + ''' + print(np.sum(track_struct['tracklet_mat']['comb_track_cost_mask'])) + global all_fea_mat + global all_fea_label + remove_idx = [] + for n in range(len(all_fea_mat)): + if np.sum(all_fea_mat[n,0,:,1])==0: + remove_idx.append(n) + + all_fea_mat = np.delete(all_fea_mat, np.array(remove_idx), axis=0) + all_fea_label = np.delete(all_fea_label, np.array(remove_idx), axis=0) + + print(len(all_fea_mat)) + #import pdb; pdb.set_trace() + + pickle.dump(all_fea_mat, open(save_all_fea_path,'wb')) + pickle.dump(all_fea_label, open(save_all_label_path,'wb')) + + + + save_batch_size = 5000 + save_batch_num = int(np.ceil(len(all_fea_mat)/save_batch_size)) + for k in range(save_batch_num): + if k!=save_batch_num-1: + temp_fea = all_fea_mat[k*save_batch_size:(k+1)*save_batch_size,:,:,:] + temp_label = all_fea_label[k*save_batch_size:(k+1)*save_batch_size,:] + else: + temp_fea = all_fea_mat[k*save_batch_size:,:,:,:] + temp_label = all_fea_label[k*save_batch_size:,:] + temp_fea_path = 'D:/Data/MOT/save_fea_mat/'+seq_name+'_all'+str(k)+'.obj' + temp_label_path = 'D:/Data/MOT/save_fea_mat/'+seq_name+'_all_label'+str(k)+'.obj' + pickle.dump(temp_fea, open(temp_fea_path,'wb')) + pickle.dump(temp_label, open(temp_label_path,'wb')) + ''' + + post_processing() + + pickle.dump(track_struct, open(track_struct_path,'wb')) + + wrt_txt(track_struct['final_tracklet_mat']) + + draw_result(img_folder, tracking_img_folder) + + convert_frames_to_video(tracking_img_folder+'/', tracking_video_path, 20) + + return track_struct + +def check_classfication(): + track_struct = pickle.load(open(track_struct_path,'rb')) + #import pdb; pdb.set_trace() + appear_mat = track_struct['tracklet_mat']['appearance_fea_mat'].copy() + app_cost = np.zeros((track_struct['tracklet_mat']['comb_track_cost_mask'].shape[0], + track_struct['tracklet_mat']['comb_track_cost_mask'].shape[1])) + label_mat = np.zeros((track_struct['tracklet_mat']['comb_track_cost_mask'].shape[0], + track_struct['tracklet_mat']['comb_track_cost_mask'].shape[1])) + + conf_M = np.zeros((2,2)) + M = track_lib.load_detection(gt_path, 'MOT_gt') + total_bbox = np.zeros((len(M),4)) + total_bbox[:,0] = M[:,1] + total_bbox[:,1] = M[:,2] + total_bbox[:,2] = M[:,3] + total_bbox[:,3] = M[:,4] + for n1 in range(track_struct['tracklet_mat']['comb_track_cost_mask'].shape[0]-1): + print(n1) + for n2 in range(n1+1,track_struct['tracklet_mat']['comb_track_cost_mask'].shape[1]): + if track_struct['tracklet_mat']['comb_track_cost_mask'][n1,n2]==0: + continue + idx1 = np.where(appear_mat[:,0]==n1+1)[0] + idx2 = np.where(appear_mat[:,0]==n2+1)[0] + idx1 = np.array(idx1,dtype=int) + idx2 = np.array(idx2,dtype=int) + X1 = appear_mat[idx1,2:] + X2 = appear_mat[idx2,2:] + temp_dist = spatial.distance.cdist(X1, X2, 'euclidean') + app_cost[n1,n2] = np.min(temp_dist) + + fr_idx = np.where(track_struct['tracklet_mat']['xmin_mat'][n1,:]!=-1)[0] + bbox1 = np.zeros((len(fr_idx),4)) + bbox1[:,0] = track_struct['tracklet_mat']['xmin_mat'][n1,fr_idx] + bbox1[:,1] = track_struct['tracklet_mat']['ymin_mat'][n1,fr_idx] + bbox1[:,2] = track_struct['tracklet_mat']['xmax_mat'][n1,fr_idx]-track_struct['tracklet_mat']['xmin_mat'][n1,fr_idx]+1 + bbox1[:,3] = track_struct['tracklet_mat']['ymax_mat'][n1,fr_idx]-track_struct['tracklet_mat']['ymin_mat'][n1,fr_idx]+1 + + obj_id1 = [] + for k in range(len(bbox1)): + temp_bbox1 = np.zeros((1,4)) + temp_bbox1[:,:] = bbox1[k,:] + temp_idx = np.where(M[:,0]==fr_idx[k]+1)[0] + temp_bbox = np.zeros((len(temp_idx),4)) + temp_bbox[:,:] = M[temp_idx,1:5] + overlap_mat,_,_,_ = track_lib.get_overlap(temp_bbox1, temp_bbox) + #import pdb; pdb.set_trace() + idx = np.where(overlap_mat[0,:]==np.max(overlap_mat[0,:]))[0] + #if len(idx)==0: + # import pdb; pdb.set_trace() + idx = idx[0] + obj_id1.append(M[temp_idx[idx],5]) + c = Counter(obj_id1) + id1, count = c.most_common()[0] + + fr_idx = np.where(track_struct['tracklet_mat']['xmin_mat'][n2,:]!=-1)[0] + bbox2 = np.zeros((len(fr_idx),4)) + bbox2[:,0] = track_struct['tracklet_mat']['xmin_mat'][n2,fr_idx] + bbox2[:,1] = track_struct['tracklet_mat']['ymin_mat'][n2,fr_idx] + bbox2[:,2] = track_struct['tracklet_mat']['xmax_mat'][n2,fr_idx]-track_struct['tracklet_mat']['xmin_mat'][n2,fr_idx]+1 + bbox2[:,3] = track_struct['tracklet_mat']['ymax_mat'][n2,fr_idx]-track_struct['tracklet_mat']['ymin_mat'][n2,fr_idx]+1 + + obj_id2 = [] + for k in range(len(bbox2)): + temp_bbox2 = np.zeros((1,4)) + temp_bbox2[:,:] = bbox2[k,:] + temp_idx = np.where(M[:,0]==fr_idx[k]+1)[0] + temp_bbox = np.zeros((len(temp_idx),4)) + temp_bbox[:,:] = M[temp_idx,1:5] + overlap_mat,_,_,_ = track_lib.get_overlap(temp_bbox2, temp_bbox) + idx = np.where(overlap_mat[0,:]==np.max(overlap_mat[0,:]))[0] + idx = idx[0] + obj_id2.append(M[temp_idx[idx],5]) + c = Counter(obj_id2) + #import pdb; pdb.set_trace() + id2, count = c.most_common()[0] + + if id1==id2: + label_mat[n1,n2] = 1 + else: + label_mat[n1,n2] = -1 + cost1 = track_struct['tracklet_mat']['comb_track_cost'][label_mat!=0] + cost2 = app_cost[label_mat!=0] + labels = label_mat[label_mat!=0] + pred_label1 = np.zeros(cost1.shape[0]) + pred_label1[cost1<0] = 1 + pred_label1[pred_label1==0] = -1 + for n in range(cost1.shape[0]): + if labels[n]==1 and pred_label1[n]==1: + conf_M[0,0] = conf_M[0,0]+1 + elif labels[n]==1 and pred_label1[n]==-1: + conf_M[0,1] = conf_M[0,1]+1 + elif labels[n]==-1 and pred_label1[n]==1: + conf_M[1,0] = conf_M[1,0]+1 + else: + conf_M[1,1] = conf_M[1,1]+1 + #acc1 = (conf_M[0,0]+conf_M[1,1])/np.sum(conf_M) + thresh = np.array(range(-10,10))/4 + acc1 = np.zeros(len(thresh)) + for n in range(len(thresh)): + pred_label1 = np.zeros(cost1.shape[0]) + pred_label1[cost1overlap_thresh: + cand_mask[cand_idx[n]] = 0 + + # get mean color of detection + cand_idx = np.where(cand_mask[:,0]==1)[0] + mean_color_mat = np.zeros((num_det,3)) + for n in range(len(cand_idx)): + xmin = int(max(0,M[cand_idx[n],1])) + ymin = int(max(0,M[cand_idx[n],2])) + xmax = int(min(img_size[1]-1,M[cand_idx[n],1]+M[cand_idx[n],3])) + ymax = int(min(img_size[0]-1,M[cand_idx[n],2]+M[cand_idx[n],4])) + mean_color_mat[cand_idx[n],0] = np.mean(img[ymin:ymax+1,xmin:xmax+1,0]) + mean_color_mat[cand_idx[n],1] = np.mean(img[ymin:ymax+1,xmin:xmax+1,1]) + mean_color_mat[cand_idx[n],2] = np.mean(img[ymin:ymax+1,xmin:xmax+1,2]) + + # assign detection to track + tracklet_mat = track_struct['final_tracklet_mat'].copy() + num_track = len(tracklet_mat['xmin_mat']) + det_to_track_overlap = np.zeros((len(cand_idx),num_track)) + det_to_track_mask = np.zeros((len(cand_idx),num_track)) + det_to_track_dist = np.zeros((len(cand_idx),num_track)) + for n in range(len(cand_idx)): + fr_idx = M[cand_idx[n],0] + for m in range(len(tracklet_mat['xmin_mat'])): + non_neg_idx = np.where(tracklet_mat['xmin_mat'][m,:]!=-1)[0] + t_min = np.min(non_neg_idx) + t_max = np.max(non_neg_idx) + if fr_idx-1>=t_min and fr_idx-1<=t_max: + continue + det_to_track_dist[n,m] = min(abs(fr_idx-1-t_min),abs(fr_idx-1-t_max)) + det_to_track_mask[n,m] = 1 + track_bbox = np.zeros((1,4)) + if abs(fr_idx-1-t_min)0: + cnt = cnt+1 + else: + break + num_len = cnt + for n in range(length-num_len): + if n==0: + out_str = '0' + else: + out_str = out_str+'0' + if length-num_len>0: + return out_str+str(num) + else: + return str(num) + +#bbox = [x, y, w, h] +def get_IOU(bbox1, bbox2): + area1 = bbox1[2]*bbox1[3] + area2 = bbox2[2]*bbox2[3] + x1 = max(bbox1[0], bbox2[0]) + y1 = max(bbox1[1], bbox2[1]) + x2 = min(bbox1[0]+bbox1[2]-1, bbox2[0]+bbox2[2]-1) + y2 = min(bbox1[1]+bbox1[3]-1, bbox2[1]+bbox2[3]-1) + + #import pdb; pdb.set_trace() + overlap_area = max(0, (x2-x1+1))*max(0, (y2-y1+1)) + ratio = overlap_area/(area1+area2-overlap_area) + return ratio + +def get_overlap(bbox1, bbox2): + num1 = bbox1.shape[0] + num2 = bbox2.shape[0] + overlap_mat = np.zeros((num1, num2)) + for n in range(num1): + for m in range(num2): + + #import pdb; pdb.set_trace() + overlap_mat[n,m] = get_IOU(bbox1[n,:], bbox2[m,:]) + + return overlap_mat + +def load_detection(file_name, dataset): + + # M=[fr_id (from 1), x, y, w, h, det_score] + if dataset=='Underwater': + f = np.loadtxt(file_name, delimiter=',') + f = np.array(f) + M = np.zeros((f.shape[0], 6)) + M[:,0] = f[:,0]+1 + M[:,1:5] = f[:,1:5] + M[:,5] = f[:,5] + M[:,3] = M[:,3]-M[:,1]+1 + M[:,4] = M[:,4]-M[:,2]+1 + return M + if dataset=='UA-Detrac': + f = np.loadtxt(file_name, delimiter=',') + f = np.array(f) + M = np.zeros((f.shape[0], 6)) + M[:,0] = f[:,0] + M[:,1:6] = f[:,2:7] + #import pdb; pdb.set_trace() + return M + if dataset=='KITTI': + f = np.loadtxt(det_path,delimiter=' ',dtype='str') + mask = np.zeros((len(f),1)) + for n in range(len(f)): + if f[n][2]=='Car' or f[n][2]=='Van': + mask[n,0] = 1 + num = int(np.sum(mask)) + M = np.zeros((num, 6)) + cnt = 0 + for n in range(len(f)): + if mask[n,0]==1: + M[cnt,0] = int(float(f[n][0]))+1 + M[cnt,1] = int(float(f[n][6])) + M[cnt,2] = int(float(f[n][7])) + M[cnt,3] = int(float(f[n][8]))-int(float(f[n][6]))+1 + M[cnt,4] = int(float(f[n][9]))-int(float(f[n][7]))+1 + M[cnt,5] = float(f[n][17]) + cnt = cnt+1 + + #import pdb; pdb.set_trace() + return M + if dataset=='MOT': + f = np.loadtxt(file_name, delimiter=',') + f = np.array(f) + M = np.zeros((f.shape[0], 6)) + M[:,0] = f[:,0] + M[:,1:6] = f[:,2:7] + #import pdb; pdb.set_trace() + return M + if dataset=='KITTI_3d': + f = np.loadtxt(file_name, delimiter=',') + f = np.array(f) + mask = np.zeros((len(f),1)) + for n in range(len(f)): + # only for pedestrian + if f[n][7]==4 or f[n][7]==5 or f[n][7]==6: + mask[n,0] = 1 + num = int(np.sum(mask)) + + M = np.zeros((num, 10)) + cnt = 0 + for n in range(len(f)): + if mask[n,0]==1: + M[cnt,0] = int(float(f[n][0]))+1 + M[cnt,1] = int(float(f[n][2])) + M[cnt,2] = int(float(f[n][3])) + M[cnt,3] = int(float(f[n][4])) + M[cnt,4] = int(float(f[n][5])) + M[cnt,5] = 1.0 + M[cnt,6] = float(f[n][8]) + M[cnt,7] = float(f[n][9]) + M[cnt,8] = float(f[n][10]) + M[cnt,9] = float(f[n][11]) + cnt = cnt+1 + #import pdb; pdb.set_trace() + return M + + if dataset=='KITTI_3d_2': + f = np.loadtxt(file_name, dtype=str, delimiter=',') + f = np.array(f) + mask = np.zeros((len(f),1)) + for n in range(len(f)): + # only for pedestrian + if f[n][11]=="Pedestrian" or f[n][11]=="Cyclist": + mask[n,0] = 1 + num = int(np.sum(mask)) + + M = np.zeros((num, 10)) + cnt = 0 + for n in range(len(f)): + if mask[n,0]==1: + M[cnt,0] = int(float(f[n][0]))+1 + M[cnt,1] = int(float(f[n][1])) + M[cnt,2] = int(float(f[n][2])) + M[cnt,3] = int(float(f[n][3])) + M[cnt,4] = int(float(f[n][4])) + M[cnt,5] = float(f[n][10])/100.0 + M[cnt,6] = float(f[n][5])/723.0 + M[cnt,7] = float(f[n][7])/723.0 + M[cnt,8] = float(f[n][8])/723.0 + M[cnt,9] = float(f[n][9])/723.0 + cnt = cnt+1 + #import pdb; pdb.set_trace() + return M + +def bbox_associate(overlap_mat, IOU_thresh): + idx1 = [] + idx2 = [] + while 1: + idx = np.unravel_index(np.argmax(overlap_mat, axis=None), overlap_mat.shape) + if overlap_mat[idx]IOU_thresh: + if s1>s2: + cand_idx[n2] = 0 + else: + cand_idx[n1] = 0 + idx = np.where(cand_idx==1)[0] + new_bbox = bbox[idx,:] + return idx, new_bbox + +def preprocessing(tracklet_mat, len_thresh): + new_tracklet_mat = tracklet_mat + N_tracklet = new_tracklet_mat['xmin_mat'].shape[0] + remove_idx = [] + for n in range(N_tracklet): + idx = np.where(new_tracklet_mat['xmin_mat'][n,:]!=-1)[0] + max_det_score = np.max(new_tracklet_mat['det_score_mat'][n,idx]) + if len(idx)0: + num1 = bbox1.shape[0] + else: + num1 = 0 + if len(bbox2)>0: + num2 = bbox2.shape[0] + else: + num2 = 0 + + new_track_id1 = track_id1 + new_tracklet_mat = tracklet_mat + if fr_idx2==2 and num1>0: + new_track_id1 = list(range(1,num1+1)) + ''' + new_tracklet_mat['xmin_mat'] = -np.ones((num1, num_fr)) + new_tracklet_mat['ymin_mat'] = -np.ones((num1, num_fr)) + new_tracklet_mat['xmax_mat'] = -np.ones((num1, num_fr)) + new_tracklet_mat['ymax_mat'] = -np.ones((num1, num_fr)) + new_tracklet_mat['det_score_mat'] = -np.ones((num1, num_fr)) + ''' + new_tracklet_mat['xmin_mat'][0:num1,0] = bbox1[:,0] + new_tracklet_mat['ymin_mat'][0:num1,0] = bbox1[:,1] + new_tracklet_mat['xmax_mat'][0:num1,0] = bbox1[:,0]+bbox1[:,2]-1 + new_tracklet_mat['ymax_mat'][0:num1,0] = bbox1[:,1]+bbox1[:,3]-1 + new_tracklet_mat['det_score_mat'][0:num1,0] = det_score1 + new_tracklet_mat['x_3d_mat'][0:num1,0] = bbox_3d_1[:,0] + new_tracklet_mat['y_3d_mat'][0:num1,0] = bbox_3d_1[:,1] + new_tracklet_mat['w_3d_mat'][0:num1,0] = bbox_3d_1[:,2] + new_tracklet_mat['h_3d_mat'][0:num1,0] = bbox_3d_1[:,3] + max_id = num1 + + if len(bbox1)==0 and len(bbox2)!=0: + idx1 = [] + idx2 = [] + elif len(bbox1)!=0 and len(bbox2)==0: + idx1 = [] + idx2 = [] + elif len(bbox1)==0 and len(bbox2)==0: + idx1 = [] + idx2 = [] + elif len(bbox1)!=0 and len(bbox2)!=0: + # pred bbox1 + pred_bbox1 = np.zeros((len(bbox1),4)) + for k in range(len(bbox1)): + temp_track_id = new_track_id1[k]-1 + t_idx = np.where(new_tracklet_mat['xmin_mat'][temp_track_id,:]!=-1)[0] + t_min = np.min(t_idx) + if t_mincolor_thresh] = 0 + idx1, idx2 = bbox_associate(overlap_mat, track_params['IOU_thresh']) + + if len(idx1)==0 and num2>0: + new_track_id2 = list(np.array(range(1,num2+1))+max_id) + ''' + new_tracklet_mat['xmin_mat'] = \ + np.append(new_tracklet_mat['xmin_mat'], -np.ones((num2,num_fr)), axis=0) + new_tracklet_mat['ymin_mat'] = \ + np.append(new_tracklet_mat['ymin_mat'], -np.ones((num2,num_fr)), axis=0) + new_tracklet_mat['xmax_mat'] = \ + np.append(new_tracklet_mat['xmax_mat'], -np.ones((num2,num_fr)), axis=0) + new_tracklet_mat['ymax_mat'] = \ + np.append(new_tracklet_mat['ymax_mat'], -np.ones((num2,num_fr)), axis=0) + new_tracklet_mat['det_score_mat'] = \ + np.append(new_tracklet_mat['det_score_mat'], -np.ones((num2,num_fr)), axis=0) + ''' + max_id = max_id+num2 + new_tracklet_mat['xmin_mat'][max_id-num2:max_id,fr_idx2-1] = bbox2[:,0] + new_tracklet_mat['ymin_mat'][max_id-num2:max_id,fr_idx2-1] = bbox2[:,1] + new_tracklet_mat['xmax_mat'][max_id-num2:max_id,fr_idx2-1] = bbox2[:,0]+bbox2[:,2]-1 + new_tracklet_mat['ymax_mat'][max_id-num2:max_id,fr_idx2-1] = bbox2[:,1]+bbox2[:,3]-1 + new_tracklet_mat['det_score_mat'][max_id-num2:max_id,fr_idx2-1] = det_score2 + new_tracklet_mat['x_3d_mat'][max_id-num2:max_id,fr_idx2-1] = bbox_3d_2[:,0] + new_tracklet_mat['y_3d_mat'][max_id-num2:max_id,fr_idx2-1] = bbox_3d_2[:,1] + new_tracklet_mat['w_3d_mat'][max_id-num2:max_id,fr_idx2-1] = bbox_3d_2[:,2] + new_tracklet_mat['h_3d_mat'][max_id-num2:max_id,fr_idx2-1] = bbox_3d_2[:,3] + elif len(idx1)>0: + new_track_id2 = [] + for n in range(num2): + #import pdb; pdb.set_trace() + temp_idx = np.where(idx2==n)[0] + if len(temp_idx)==0: + max_id = max_id+1 + new_track_id2.append(max_id) + ''' + new_tracklet_mat['xmin_mat'] = \ + np.append(new_tracklet_mat['xmin_mat'], -np.ones((1,num_fr)), axis=0) + new_tracklet_mat['ymin_mat'] = \ + np.append(new_tracklet_mat['ymin_mat'], -np.ones((1,num_fr)), axis=0) + new_tracklet_mat['xmax_mat'] = \ + np.append(new_tracklet_mat['xmax_mat'], -np.ones((1,num_fr)), axis=0) + new_tracklet_mat['ymax_mat'] = \ + np.append(new_tracklet_mat['ymax_mat'], -np.ones((1,num_fr)), axis=0) + new_tracklet_mat['det_score_mat'] = \ + np.append(new_tracklet_mat['det_score_mat'], -np.ones((1,num_fr)), axis=0) + ''' + #import pdb; pdb.set_trace() + new_tracklet_mat['xmin_mat'][max_id-1,fr_idx2-1] = bbox2[n,0] + new_tracklet_mat['ymin_mat'][max_id-1,fr_idx2-1] = bbox2[n,1] + new_tracklet_mat['xmax_mat'][max_id-1,fr_idx2-1] = bbox2[n,0]+bbox2[n,2]-1 + new_tracklet_mat['ymax_mat'][max_id-1,fr_idx2-1] = bbox2[n,1]+bbox2[n,3]-1 + new_tracklet_mat['det_score_mat'][max_id-1,fr_idx2-1] = det_score2[n] + new_tracklet_mat['x_3d_mat'][max_id-1,fr_idx2-1] = bbox_3d_2[n,0] + new_tracklet_mat['y_3d_mat'][max_id-1,fr_idx2-1] = bbox_3d_2[n,1] + new_tracklet_mat['w_3d_mat'][max_id-1,fr_idx2-1] = bbox_3d_2[n,2] + new_tracklet_mat['h_3d_mat'][max_id-1,fr_idx2-1] = bbox_3d_2[n,3] + else: + temp_idx = temp_idx[0] + new_track_id2.append(new_track_id1[idx1[temp_idx]]) + new_tracklet_mat['xmin_mat'] \ + [new_track_id1[idx1[temp_idx]]-1,fr_idx2-1] = bbox2[n,0] + new_tracklet_mat['ymin_mat'] \ + [new_track_id1[idx1[temp_idx]]-1,fr_idx2-1] = bbox2[n,1] + new_tracklet_mat['xmax_mat'] \ + [new_track_id1[idx1[temp_idx]]-1,fr_idx2-1] = bbox2[n,0]+bbox2[n,2]-1 + new_tracklet_mat['ymax_mat'] \ + [new_track_id1[idx1[temp_idx]]-1,fr_idx2-1] = bbox2[n,1]+bbox2[n,3]-1 + new_tracklet_mat['det_score_mat'] \ + [new_track_id1[idx1[temp_idx]]-1,fr_idx2-1] = det_score2[n] + new_tracklet_mat['x_3d_mat'] \ + [new_track_id1[idx1[temp_idx]]-1,fr_idx2-1] = bbox_3d_2[n,0] + new_tracklet_mat['y_3d_mat'] \ + [new_track_id1[idx1[temp_idx]]-1,fr_idx2-1] = bbox_3d_2[n,1] + new_tracklet_mat['w_3d_mat'] \ + [new_track_id1[idx1[temp_idx]]-1,fr_idx2-1] = bbox_3d_2[n,2] + new_tracklet_mat['h_3d_mat'] \ + [new_track_id1[idx1[temp_idx]]-1,fr_idx2-1] = bbox_3d_2[n,3] + else: + new_track_id2 = [] + + #import pdb; pdb.set_trace() + new_max_id = max_id + return new_tracklet_mat, new_track_id1, new_track_id2, new_max_id + +def init_clustering(): + + global track_struct + + N_tracklet = track_struct['tracklet_mat']['xmin_mat'].shape[0] + + # track interval + track_struct['tracklet_mat']['track_interval'] = np.zeros((N_tracklet, 2)) + + # track cluster + track_struct['tracklet_mat']['track_cluster'] = [] + + # track class + track_struct['tracklet_mat']['track_class'] = np.arange(N_tracklet, dtype=int) + + # time cluster + track_struct['tracklet_mat']['time_cluster'] = [] + for n in range(track_struct['track_params']['num_time_cluster']): + track_struct['tracklet_mat']['time_cluster'].append([]) + + track_struct['tracklet_mat']['track_cluster_t_idx'] = [] + for n in range(N_tracklet): + idx = np.where(track_struct['tracklet_mat']['xmin_mat'][n,:]!=-1)[0] + track_struct['tracklet_mat']['track_interval'][n,0] = np.min(idx) + track_struct['tracklet_mat']['track_interval'][n,1] = np.max(idx) + track_struct['tracklet_mat']['track_cluster'].append([n]) + + if n in remove_set: + track_struct['tracklet_mat']['track_cluster_t_idx'].append([-1]) + else: + min_time_cluster_idx = int(np.floor(max(track_struct['tracklet_mat']['track_interval'][n,0] + -track_struct['track_params']['t_dist_thresh']-5,0) + /track_struct['track_params']['time_cluster_dist'])) + max_time_cluster_idx = int(np.floor(min(track_struct['tracklet_mat']['track_interval'][n,1] + +track_struct['track_params']['t_dist_thresh']+5, + track_struct['tracklet_mat']['xmin_mat'].shape[1]-1) + /track_struct['track_params']['time_cluster_dist'])) + track_struct['tracklet_mat']['track_cluster_t_idx'].append(list(range(min_time_cluster_idx,max_time_cluster_idx+1))) + for k in range(min_time_cluster_idx,max_time_cluster_idx+1): + track_struct['tracklet_mat']['time_cluster'][k].append(n) + + # get center position of each detection location + mask = track_struct['tracklet_mat']['xmin_mat']==-1 + track_struct['tracklet_mat']['center_x'] = \ + (track_struct['tracklet_mat']['xmin_mat']+track_struct['tracklet_mat']['xmax_mat'])/2 + track_struct['tracklet_mat']['center_y'] = \ + (track_struct['tracklet_mat']['ymin_mat']+track_struct['tracklet_mat']['ymax_mat'])/2 + track_struct['tracklet_mat']['w'] = \ + track_struct['tracklet_mat']['xmax_mat']-track_struct['tracklet_mat']['xmin_mat']+1 + track_struct['tracklet_mat']['h'] = \ + track_struct['tracklet_mat']['ymax_mat']-track_struct['tracklet_mat']['ymin_mat']+1 + track_struct['tracklet_mat']['center_x'][mask] = -1 + track_struct['tracklet_mat']['center_y'][mask] = -1 + track_struct['tracklet_mat']['w'][mask] = -1 + track_struct['tracklet_mat']['h'][mask] = -1 + + # neighbor_track_idx and conflict_track_idx + track_struct['tracklet_mat']['neighbor_track_idx'] = [] + track_struct['tracklet_mat']['conflict_track_idx'] = [] + for n in range(N_tracklet): + track_struct['tracklet_mat']['neighbor_track_idx'].append([]) + track_struct['tracklet_mat']['conflict_track_idx'].append([]) + for n in range(N_tracklet-1): + for m in range(n+1, N_tracklet): + t_min1 = track_struct['tracklet_mat']['track_interval'][n,0] + t_max1 = track_struct['tracklet_mat']['track_interval'][n,1] + t_min2 = track_struct['tracklet_mat']['track_interval'][m,0] + t_max2 = track_struct['tracklet_mat']['track_interval'][m,1] + overlap_len = min(t_max2,t_max1)-max(t_min1,t_min2)+1 + overlap_r = overlap_len/(t_max1-t_min1+1+t_max2-t_min2+1-overlap_len) + if overlap_len>0 and overlap_r>track_struct['track_params']['track_overlap_thresh']: + track_struct['tracklet_mat']['conflict_track_idx'][n].append(m) + track_struct['tracklet_mat']['conflict_track_idx'][m].append(n) + continue + if overlap_len>0 and overlap_r<=track_struct['track_params']['track_overlap_thresh']: + # check the search region + t1 = int(max(t_min1,t_min2)) + t2 = int(min(t_max2,t_max1)) + if (t_min1<=t_min2 and t_max1>=t_max2) or (t_min1>=t_min2 and t_max1<=t_max2) or overlap_len>4: + track_struct['tracklet_mat']['conflict_track_idx'][n].append(m) + track_struct['tracklet_mat']['conflict_track_idx'][m].append(n) + continue + + cand_t = np.array(range(t1,t2+1)) + dist_x = abs(track_struct['tracklet_mat']['center_x'][n,cand_t] \ + -track_struct['tracklet_mat']['center_x'][m,cand_t]) + dist_y = abs(track_struct['tracklet_mat']['center_y'][n,cand_t] \ + -track_struct['tracklet_mat']['center_y'][m,cand_t]) + w1 = track_struct['tracklet_mat']['w'][n,cand_t] + h1 = track_struct['tracklet_mat']['h'][n,cand_t] + w2 = track_struct['tracklet_mat']['w'][m,cand_t] + h2 = track_struct['tracklet_mat']['h'][m,cand_t] + + min_len = np.min([np.min(w1),np.min(h1),np.min(w2),np.min(h2)]) + min_dist_x1 = np.min(dist_x/min_len) + min_dist_y1 = np.min(dist_y/min_len) + min_dist_x2 = np.min(dist_x/min_len) + min_dist_y2 = np.min(dist_y/min_len) + if min_dist_x1=t_max2: + t1 = int(t_min1) + t2 = int(t_max2) + else: + t1 = int(t_max1) + t2 = int(t_min2) + + #*********************************** + tr_t1 = np.array(range(int(t_min1),int(t_max1+1))) + tr_x1 = track_struct['tracklet_mat']['center_x'][n,int(t_min1):int(t_max1+1)] + tr_y1 = track_struct['tracklet_mat']['center_y'][n,int(t_min1):int(t_max1+1)] + if len(tr_t1)>10: + if t_min1>=t_max2: + tr_t1 = tr_t1[0:10] + tr_x1 = tr_x1[0:10] + tr_y1 = tr_y1[0:10] + else: + tr_t1 = tr_t1[-10:] + tr_x1 = tr_x1[-10:] + tr_y1 = tr_y1[-10:] + ts_x1 = linear_pred_v2(tr_t1, tr_x1, np.array([t2])) + ts_y1 = linear_pred_v2(tr_t1, tr_y1, np.array([t2])) + dist_x1 = abs(ts_x1[0]-track_struct['tracklet_mat']['center_x'][m,t2]) + dist_y1 = abs(ts_y1[0]-track_struct['tracklet_mat']['center_y'][m,t2]) + + tr_t2 = np.array(range(int(t_min2),int(t_max2+1))) + tr_x2 = track_struct['tracklet_mat']['center_x'][m,int(t_min2):int(t_max2+1)] + tr_y2 = track_struct['tracklet_mat']['center_y'][m,int(t_min2):int(t_max2+1)] + if len(tr_t2)>10: + if t_min2>t_max1: + tr_t2 = tr_t2[0:10] + tr_x2 = tr_x2[0:10] + tr_y2 = tr_y2[0:10] + else: + tr_t2 = tr_t2[-10:] + tr_x2 = tr_x2[-10:] + tr_y2 = tr_y2[-10:] + + ts_x2 = linear_pred_v2(tr_t2, tr_x2, np.array([t1])) + ts_y2 = linear_pred_v2(tr_t2, tr_y2, np.array([t1])) + dist_x2 = abs(ts_x2[0]-track_struct['tracklet_mat']['center_x'][n,t1]) + dist_y2 = abs(ts_y2[0]-track_struct['tracklet_mat']['center_y'][n,t1]) + + dist_x = min(dist_x1, dist_x2) + dist_y = min(dist_y1, dist_y2) + #*********************************** + + #import pdb; pdb.set_trace() + ''' + dist_x = abs(track_struct['tracklet_mat']['center_x'][n,t1] \ + -track_struct['tracklet_mat']['center_x'][m,t2]) + dist_y = abs(track_struct['tracklet_mat']['center_y'][n,t1] \ + -track_struct['tracklet_mat']['center_y'][m,t2]) + ''' + + w1 = track_struct['tracklet_mat']['w'][n,t1] + h1 = track_struct['tracklet_mat']['h'][n,t1] + w2 = track_struct['tracklet_mat']['w'][m,t2] + h2 = track_struct['tracklet_mat']['h'][m,t2] + + min_len = np.min([np.min(w1),np.min(h1),np.min(w2),np.min(h2)]) + min_dist_x1 = dist_x/min_len + min_dist_y1 = dist_y/min_len + min_dist_x2 = dist_x/min_len + min_dist_y2 = dist_y/min_len + + if min_dist_x160: + continue + + if track_set[n,0] not in track_struct['tracklet_mat']['neighbor_track_idx'][track_set[n,1]]: + track_struct['tracklet_mat']['neighbor_track_idx'][track_set[n,1]].append(track_set[n,0]) + track_struct['tracklet_mat']['neighbor_track_idx'][track_set[n,0]].append(track_set[n,1]) + if track_set[n,0] in track_struct['tracklet_mat']['conflict_track_idx'][track_set[n,1]]: + track_struct['tracklet_mat']['conflict_track_idx'][track_set[n,1]].remove(track_set[n,0]) + track_struct['tracklet_mat']['conflict_track_idx'][track_set[n,0]].remove(track_set[n,1]) + + else: + if track_set[n,0] in track_struct['tracklet_mat']['neighbor_track_idx'][track_set[n,1]]: + track_struct['tracklet_mat']['neighbor_track_idx'][track_set[n,1]].remove(track_set[n,0]) + track_struct['tracklet_mat']['neighbor_track_idx'][track_set[n,0]].remove(track_set[n,1]) + if track_set[n,0] not in track_struct['tracklet_mat']['conflict_track_idx'][track_set[n,1]]: + track_struct['tracklet_mat']['conflict_track_idx'][track_set[n,1]].append(track_set[n,0]) + track_struct['tracklet_mat']['conflict_track_idx'][track_set[n,0]].append(track_set[n,1]) + + + # cluster cost + track_struct['tracklet_mat']['cluster_cost'] = [] + for n in range(N_tracklet): + track_struct['tracklet_mat']['cluster_cost'].append(0) + + # save all comb cost for two tracklets + # comb_track_cost [track_id1, track_id2, cost] + # track_struct['tracklet_mat']['comb_track_cost'] = [] + + # save feature mat for training + ''' + if len(track_struct['tracklet_mat']['track_set'])>0: + track_struct['tracklet_mat']['save_fea_mat'] = np.zeros((len(track_struct['tracklet_mat']['track_set']), feature_size, max_length, 2)) + else: + track_struct['tracklet_mat']['save_fea_mat'] = [] + ''' + return + +def comb_cost(tracklet_set, feature_size, max_length, img_size, sess, + batch_X_x, batch_X_y, batch_X_w, batch_X_h, batch_X_a, batch_mask_1, + batch_mask_2, batch_Y, keep_prob, y_conv): + #comb_track_cost_list = tracklet_mat['comb_track_cost'].copy() + #comb_track_cost = np.array(tracklet_mat['comb_track_cost'].copy()) + #save_fea_mat = tracklet_mat['save_fea_mat'].copy() + #track_set = tracklet_mat['track_set'].copy() + + global track_struct + global all_fea_mat + global all_fea_label + #import pdb; pdb.set_trace() + tracklet_mat = track_struct['tracklet_mat'] + loc_scales = track_struct['track_params']['loc_scales'] + + temp_sum = np.sum(all_fea_mat[:,4,:,1], axis=1) + if len(np.where(temp_sum!=0)[0])==0: + fea_id = 0 + else: + fea_id = int(np.max(np.where(temp_sum!=0)[0]))+1 + + + #print(fea_id) + #import pdb; pdb.set_trace() + # cnn classifier + N_tracklet = len(tracklet_set) + track_interval = tracklet_mat['track_interval'] + sort_idx = np.argsort(track_interval[np.array(tracklet_set),1]) + cost = 0 + if len(sort_idx)<=1: + return cost + + + remove_ids = [] + #comb_fea_mat = np.zeros((len(sort_idx)-1,feature_size,max_length,2)) + #comb_fea_label = np.zeros((len(sort_idx)-1,4)) + + comb_fea_mat = np.zeros((int(len(sort_idx)*(len(sort_idx)-1)/2),feature_size,max_length,3)) + comb_fea_label = np.zeros((int(len(sort_idx)*(len(sort_idx)-1)/2),4)) + + temp_cost_list = [] + #print(len(comb_track_cost)) + cnt = -1 + for n in range(0, len(sort_idx)-1): + for kk in range(n+1,len(sort_idx)): + cnt = int(cnt+1) + track_id1 = tracklet_set[sort_idx[n]] + track_id2 = tracklet_set[sort_idx[kk]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + remove_ids.append(cnt) + continue + + if tracklet_mat['comb_track_cost_mask'][track_id1,track_id2]==1: + cost = cost+tracklet_mat['comb_track_cost'][track_id1,track_id2] + remove_ids.append(cnt) + continue + + comb_fea_label[cnt,0] = track_id1 + comb_fea_label[cnt,1] = track_id2 + + #if track_id1==32 and track_id2==46: + # import pdb; pdb.set_trace() + ''' + start_time = time.time() + if len(comb_track_cost)>0: + search_idx = np.where(np.logical_and(comb_track_cost[:,0]==track_id1, comb_track_cost[:,1]==track_id2)) + if len(search_idx[0])>0: + remove_ids.append(n) + #import pdb; pdb.set_trace() + cost = cost+comb_track_cost[search_idx[0][0],2] + elapsed_time = time.time() - start_time + print(elapsed_time) + continue + ''' + temp_cost_list.append([track_id1,track_id2]) + + + # t starts from 0 + #import pdb; pdb.set_trace() + t1_min = int(track_interval[track_id1,0]) + t1_max = int(track_interval[track_id1,1]) + t2_min = int(track_interval[track_id2,0]) + t2_max = int(track_interval[track_id2,1]) + t_min = int(min(t1_min,t2_min)) + t_max = int(max(t1_max,t2_max)) + + if t_max-t_min+1<=max_length: + comb_fea_mat[cnt,:,t1_min-t_min:t1_max-t_min+1,1] = 1 + comb_fea_mat[cnt,0,t1_min-t_min:t1_max-t_min+1,0] = tracklet_mat['x_3d_mat'][track_id1,t1_min:t1_max+1]/loc_scales[0] + + comb_fea_mat[cnt,1,t1_min-t_min:t1_max-t_min+1,0] = tracklet_mat['y_3d_mat'][track_id1,t1_min:t1_max+1]/loc_scales[1] + + comb_fea_mat[cnt,2,t1_min-t_min:t1_max-t_min+1,0] = tracklet_mat['w_3d_mat'][track_id1,t1_min:t1_max+1]/loc_scales[2] + + comb_fea_mat[cnt,3,t1_min-t_min:t1_max-t_min+1,0] = tracklet_mat['h_3d_mat'][track_id1,t1_min:t1_max+1]/loc_scales[3] + + cand_idx = np.where(tracklet_mat['appearance_fea_mat'][:,0]==track_id1+1)[0] + + if comb_fea_mat[cnt,4:,t1_min-t_min:t1_max-t_min+1,0].shape[1]!=np.transpose(tracklet_mat['appearance_fea_mat'] \ + [cand_idx,2:]).shape[1]: + import pdb; pdb.set_trace() + comb_fea_mat[cnt,4:,t1_min-t_min:t1_max-t_min+1,0] = np.transpose(tracklet_mat['appearance_fea_mat'][cand_idx,2:]) + + comb_fea_mat[cnt,:,t2_min-t_min:t2_max-t_min+1,2] = 1 + + comb_fea_mat[cnt,0,t2_min-t_min:t2_max-t_min+1,0] = tracklet_mat['x_3d_mat'][track_id2,t2_min:t2_max+1]/loc_scales[0] + + comb_fea_mat[cnt,1,t2_min-t_min:t2_max-t_min+1,0] = tracklet_mat['y_3d_mat'][track_id2,t2_min:t2_max+1]/loc_scales[1] + + comb_fea_mat[cnt,2,t2_min-t_min:t2_max-t_min+1,0] = tracklet_mat['w_3d_mat'][track_id2,t2_min:t2_max+1]/loc_scales[2] + + comb_fea_mat[cnt,3,t2_min-t_min:t2_max-t_min+1,0] = tracklet_mat['h_3d_mat'][track_id2,t2_min:t2_max+1]/loc_scales[3] + + cand_idx = np.where(tracklet_mat['appearance_fea_mat'][:,0]==track_id2+1)[0] + if comb_fea_mat[cnt,4:,t2_min-t_min:t2_max-t_min+1,0].shape[1]!=np.transpose(tracklet_mat['appearance_fea_mat'] \ + [cand_idx,2:]).shape[1]: + import pdb; pdb.set_trace() + + comb_fea_mat[cnt,4:,t2_min-t_min:t2_max-t_min+1,0] = np.transpose(tracklet_mat['appearance_fea_mat'][cand_idx,2:]) + else: + t_len1 = t1_max-t1_min+1 + t_len2 = t2_max-t2_min+1 + t_len_min = min(t_len1,t_len2) + mid_t = int(0.5*(t1_max+t2_min)) + if mid_t-t1_min+1>=0.5*max_length and t2_max-mid_t+1<=0.5*max_length: + t2_end = t2_max + t1_start = t2_end-max_length+1 + #t1_start = mid_t-int(0.5*max_length)+1 + #t2_end = t1_start+max_length-1 + elif mid_t-t1_min+1<=0.5*max_length and t2_max-mid_t+1>=0.5*max_length: + t1_start = t1_min + t2_end = t1_start+max_length-1 + else: # mid_t-t1_min+1>=0.5*max_length and t2_max-mid_t+1>=0.5*max_length: + t1_start = mid_t-int(0.5*max_length)+1 + t2_end = t1_start+max_length-1 + + comb_fea_mat[cnt,:,0:t1_max-t1_start+1,1] = 1 + if comb_fea_mat[cnt,0,0:t1_max-t1_start+1,0].shape[0] \ + !=tracklet_mat['xmax_mat'][track_id1,t1_start:t1_max+1].shape[0]: + import pdb; pdb.set_trace() + comb_fea_mat[cnt,0,0:t1_max-t1_start+1,0] = tracklet_mat['x_3d_mat'][track_id1,t1_start:t1_max+1]/loc_scales[0] + + comb_fea_mat[cnt,1,0:t1_max-t1_start+1,0] = tracklet_mat['y_3d_mat'][track_id1,t1_start:t1_max+1]/loc_scales[1] + + comb_fea_mat[cnt,2,0:t1_max-t1_start+1,0] = tracklet_mat['w_3d_mat'][track_id1,t1_start:t1_max+1]/loc_scales[2] + + comb_fea_mat[cnt,3,0:t1_max-t1_start+1,0] = tracklet_mat['h_3d_mat'][track_id1,t1_start:t1_max+1]/loc_scales[3] + + cand_idx = np.where(tracklet_mat['appearance_fea_mat'][:,0]==track_id1+1)[0] + cand_idx = cand_idx[t1_start-t1_min:] + comb_fea_mat[cnt,4:,0:t1_max-t1_start+1,0] = np.transpose(tracklet_mat['appearance_fea_mat'][cand_idx,2:]) + + comb_fea_mat[cnt,:,t2_min-t1_start:t2_end-t1_start+1,2] = 1 + if comb_fea_mat[cnt,0,t2_min-t1_start:t2_end-t1_start+1,0].shape[0] \ + !=tracklet_mat['xmin_mat'][track_id2,t2_min:t2_end+1].shape[0]: + import pdb; pdb.set_trace() + comb_fea_mat[cnt,0,t2_min-t1_start:t2_end-t1_start+1,0] = \ + tracklet_mat['x_3d_mat'][track_id2,t2_min:t2_end+1]/loc_scales[0] + comb_fea_mat[cnt,1,t2_min-t1_start:t2_end-t1_start+1,0] = \ + tracklet_mat['y_3d_mat'][track_id2,t2_min:t2_end+1]/loc_scales[1] + comb_fea_mat[cnt,2,t2_min-t1_start:t2_end-t1_start+1,0] = \ + tracklet_mat['w_3d_mat'][track_id2,t2_min:t2_end+1]/loc_scales[2] + comb_fea_mat[cnt,3,t2_min-t1_start:t2_end-t1_start+1,0] = \ + tracklet_mat['h_3d_mat'][track_id2,t2_min:t2_end+1]/loc_scales[3] + + cand_idx = np.where(tracklet_mat['appearance_fea_mat'][:,0]==track_id2+1)[0] + #import pdb; pdb.set_trace() + cand_idx = cand_idx[0:t2_end-t2_min+1] + comb_fea_mat[cnt,4:,t2_min-t1_start:t2_end-t1_start+1,0] \ + = np.transpose(tracklet_mat['appearance_fea_mat'][cand_idx,2:]) + + #if track_id1==34 and track_id2==39: + # import pdb; pdb.set_trace() + + # remove overlap detections + t_overlap = np.where(comb_fea_mat[cnt,0,:,1]+comb_fea_mat[cnt,0,:,2]>1) + if len(t_overlap)>0: + t_overlap = t_overlap[0] + comb_fea_mat[cnt,:,t_overlap,:] = 0 + + + if len(track_set)>0: + search_idx = np.where(np.logical_and(track_set[:,0]==track_id1, track_set[:,1]==track_id2)) + if len(search_idx[0])>0: + #save_fea_mat[search_idx[0][0],:,:,:] = comb_fea_mat[n,:,:,:] + if track_set[search_idx[0][0],2]==1: + comb_fea_label[cnt,2] = 1 + else: + comb_fea_label[cnt,3] = 1 + + + if len(remove_ids)>0: + comb_fea_mat = np.delete(comb_fea_mat, np.array(remove_ids), axis=0) + comb_fea_label = np.delete(comb_fea_label, np.array(remove_ids), axis=0) + + if len(comb_fea_mat)>0: + max_batch_size = 16 + num_batch = int(np.ceil(comb_fea_mat.shape[0]/max_batch_size)) + pred_y = np.zeros((comb_fea_mat.shape[0],2)) + for n in range(num_batch): + if n!=num_batch-1: + batch_size = max_batch_size + else: + batch_size = int(comb_fea_mat.shape[0]-(num_batch-1)*max_batch_size) + + batch_size = comb_fea_mat.shape[0] + x = np.zeros((batch_size,1,max_length,1)) + y = np.zeros((batch_size,1,max_length,1)) + w = np.zeros((batch_size,1,max_length,1)) + h = np.zeros((batch_size,1,max_length,1)) + ap = np.zeros((batch_size,feature_size-4,max_length,1)) + mask_1 = np.zeros((batch_size,1,max_length,2)) + mask_2 = np.zeros((batch_size,feature_size-4,max_length,2)) + x[:,0,:,0] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,0,:,0] + y[:,0,:,0] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,1,:,0] + w[:,0,:,0] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,2,:,0] + h[:,0,:,0] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,3,:,0] + ap[:,:,:,0] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,4:,:,0] + mask_1[:,0,:,:] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,0,:,1:] + mask_2[:,:,:,:] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,4:,:,1:] + pred_y[n*max_batch_size:n*max_batch_size+batch_size,:] = sess.run(y_conv, feed_dict={batch_X_x: x, + batch_X_y: y, + batch_X_w: w, + batch_X_h: h, + batch_X_a: ap, + batch_mask_1: mask_1, + batch_mask_2: mask_2, + batch_Y: np.zeros((batch_size,2)), + keep_prob: 1.0}) + + for n in range(len(pred_y)): + if np.sum(comb_fea_label[n,2:4])>0: + continue + if pred_y[n,0]>pred_y[n,1]: + comb_fea_label[n,2] = 1 + else: + comb_fea_label[n,3] = 1 + + if comb_fea_mat.shape[0]!=len(pred_y): + import pdb; pdb.set_trace() + + + all_fea_mat[fea_id:fea_id+len(pred_y),:,:,:] = comb_fea_mat + all_fea_label[fea_id:fea_id+len(pred_y),:] = comb_fea_label + + #if len(np.where(np.logical_and(comb_fea_label[:,0]==428,comb_fea_label[:,1]==435))[0])>0: + # import pdb; pdb.set_trace() + #print(comb_fea_label) + + cost = cost+np.sum(pred_y[:,1]-pred_y[:,0]) + #import pdb; pdb.set_trace() + + if pred_y.shape[0]!=len(temp_cost_list): + import pdb; pdb.set_trace() + for n in range(pred_y.shape[0]): + #import pdb; pdb.set_trace() + ''' + if tracklet_mat['comb_track_cost_mask'].shape[0]<=temp_cost_list[n][0] \ + or tracklet_mat['comb_track_cost_mask'].shape[1]<=temp_cost_list[n][1]: + import pdb; pdb.set_trace() + ''' + tracklet_mat['comb_track_cost_mask'][temp_cost_list[n][0],temp_cost_list[n][1]] = 1 + tracklet_mat['comb_track_cost'][temp_cost_list[n][0],temp_cost_list[n][1]] = pred_y[n,1]-pred_y[n,0] + + #comb_track_cost_list = comb_track_cost_list+temp_cost_list + #print(np.sum(tracklet_mat['comb_track_cost_mask'])) + return cost + +def get_split_cost(track_id, sess, img_size, batch_X_x, batch_X_y, batch_X_w, batch_X_h, + batch_X_a, batch_mask_1, batch_mask_2, batch_Y, keep_prob, y_conv): + #comb_track_cost_list = tracklet_mat['comb_track_cost'].copy() + #save_fea_mat = tracklet_mat['save_fea_mat'].copy() + + global track_struct + + + tracklet_mat = track_struct['tracklet_mat'] + new_cluster_cost = np.zeros((2,1)) + if len(tracklet_mat['track_cluster'][track_id])<2: + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + track_interval = tracklet_mat['track_interval'].copy() + change_cluster_idx = [len(tracklet_mat['track_cluster']), tracklet_mat['track_class'][track_id]] + new_cluster_set = [] + new_cluster_set.append([track_id]) + remain_tracks = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + remain_tracks.remove(track_id) + new_cluster_set.append(remain_tracks) + + # get cost + if len(remain_tracks)>1: + sort_idx = np.argsort(track_interval[np.array(new_cluster_set[1]),1]) + for n in range(0, len(sort_idx)-1): + track_id1 = new_cluster_set[1][sort_idx[n]] + track_id2 = new_cluster_set[1][sort_idx[n+1]] + #if track_id1==42: + # import pdb; pdb.set_trace() + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + #********************************* + new_cluster_cost[1,0] = comb_cost(remain_tracks, feature_size, + max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, + batch_Y, keep_prob, y_conv) + #tracklet_mat['comb_track_cost'] = comb_track_cost_list.copy() + #tracklet_mat['save_fea_mat'] = save_fea_mat.copy() + + # cross cost + comb_cluster = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + sort_idx = np.argsort(track_interval[np.array(comb_cluster),1]) + cross_cost = np.zeros((2,1)) + ''' + for n in range(0, len(sort_idx)-1): + track_id1 = comb_cluster[sort_idx[n]] + track_id2 = comb_cluster[sort_idx[n+1]] + if (track_id1 in new_cluster_set[0] and track_id2 in new_cluster_set[1]) \ + or (track_id1 in new_cluster_set[1] and track_id2 in new_cluster_set[0]): + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + continue + cross_cost[1,0] = cross_cost[1,0]+comb_cost([track_id1,track_id2], feature_size, max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, + batch_Y, keep_prob, y_conv) + ''' + cost = np.sum(new_cluster_cost)-cross_cost[1,0] + prev_cost = tracklet_mat['cluster_cost'][tracklet_mat['track_class'][track_id]]-cross_cost[0,0] + diff_cost = cost-prev_cost + + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + +def get_assign_cost(track_id, sess, img_size, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, batch_Y, keep_prob, y_conv): + #comb_track_cost_list = tracklet_mat['comb_track_cost'].copy() + #save_fea_mat = tracklet_mat['save_fea_mat'].copy() + + global track_struct + tracklet_mat = track_struct['tracklet_mat'] + #import pdb; pdb.set_trace() + cluster1 = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + new_cluster_cost = np.zeros((2,1)) + new_cluster_set = [] + new_cluster_set.append(cluster1.copy()) + new_cluster_set[0].remove(track_id) + track_interval = tracklet_mat['track_interval'].copy() + # get cost + if len(new_cluster_set[0])>1: + + sort_idx = np.argsort(track_interval[np.array(new_cluster_set[0]),1]) + for n in range(0, len(sort_idx)-1): + track_id1 = new_cluster_set[0][sort_idx[n]] + track_id2 = new_cluster_set[0][sort_idx[n+1]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + new_cluster_cost[0,0] = comb_cost(new_cluster_set[0], feature_size, + max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, batch_X_h, + batch_X_a, batch_mask_1, batch_mask_2, batch_Y, keep_prob, + y_conv) + #tracklet_mat['comb_track_cost'] = comb_track_cost_list.copy() + #tracklet_mat['save_fea_mat'] = save_fea_mat.copy() + + track_class = track_struct['tracklet_mat']['track_class'][track_id] + t_cluster_idx = track_struct['tracklet_mat']['track_cluster_t_idx'][track_class] + + NN_cluster = len(tracklet_mat['track_cluster']) + temp_new_cluster_cost = float("inf")*np.ones((NN_cluster,1)) + prev_cost_vec = np.zeros((NN_cluster,1)) + cross_cost_vec = np.zeros((NN_cluster,2)) + + for nn in range(len(t_cluster_idx)): + N_cluster = len(track_struct['tracklet_mat']['time_cluster'][t_cluster_idx[nn]]) + for mm in range(N_cluster): + n = track_struct['tracklet_mat']['time_cluster'][t_cluster_idx[nn]][mm] + # the original cluster + if tracklet_mat['track_class'][track_id]==n: + continue + + # check neighbor and conflict track + cluster2 = tracklet_mat['track_cluster'][n] + neighbor_flag = 1 + conflict_flag = 0 + #remove_flag = 0 + temp_cluster_set = cluster2.copy() + temp_cluster_set.append(track_id) + sort_idx = np.argsort(track_interval[np.array(temp_cluster_set),1]) + for m in range(0, len(sort_idx)-1): + track_id1 = temp_cluster_set[sort_idx[m]] + track_id2 = temp_cluster_set[sort_idx[m+1]] + #if cluster2[m] in remove_set: + # remove_flag = 1 + # break + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + neighbor_flag = 0 + break + if track_id1 in tracklet_mat['conflict_track_idx'][track_id2]: + conflict_flag = 1 + break + if neighbor_flag==0 or conflict_flag==1:# or remove_flag==1: + continue + + # get cost + temp_set = cluster2.copy() + temp_set.append(track_id) + temp_new_cluster_cost[n,0] = comb_cost(temp_set, feature_size, + max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, + batch_Y, keep_prob, y_conv) + #tracklet_mat['comb_track_cost'] = comb_track_cost_list.copy() + #tracklet_mat['save_fea_mat'] = save_fea_mat.copy() + + #import pdb; pdb.set_trace() + prev_cost_vec[n,0] = tracklet_mat['cluster_cost'][tracklet_mat['track_class'][track_id]] \ + +tracklet_mat['cluster_cost'][n] + ''' + # cross cost + comb_cluster = cluster1+cluster2 + sort_idx = np.argsort(track_interval[np.array(comb_cluster),1]) + for m in range(0, len(sort_idx)-1): + track_id1 = comb_cluster[sort_idx[m]] + track_id2 = comb_cluster[sort_idx[m+1]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + continue + if (track_id1 in cluster1 and track_id2 in cluster2) or (track_id1 in cluster2 and track_id2 in cluster1): + cross_cost_vec[n,0] = cross_cost_vec[n,0]+comb_cost([track_id1,track_id2], feature_size, max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, + batch_Y, keep_prob, y_conv) + if (track_id1 in new_cluster_set[0] and track_id2 in temp_set) or \ + (track_id1 in temp_set and track_id2 in new_cluster_set[0]): + cross_cost_vec[n,1] = cross_cost_vec[n,1]+comb_cost([track_id1,track_id2], feature_size, max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, + batch_Y, keep_prob, y_conv) + ''' + + + cost_vec = temp_new_cluster_cost[:,0]+new_cluster_cost[0,0]-cross_cost_vec[:,1] + prev_cost_vec = prev_cost_vec[:,0]-cross_cost_vec[:,0] + + diff_cost_vec = cost_vec-prev_cost_vec + min_idx = np.argmin(diff_cost_vec) + cost = cost_vec[min_idx] + if cost==float("inf"): + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + diff_cost = diff_cost_vec[min_idx] + new_cluster_cost[1,0] = temp_new_cluster_cost[min_idx,0] + change_cluster_idx = [tracklet_mat['track_class'][track_id],min_idx] + temp_set = tracklet_mat['track_cluster'][min_idx].copy() + temp_set.append(track_id) + new_cluster_set.append(temp_set) + + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + +def get_merge_cost(track_id, sess, img_size, batch_X_x, batch_X_y, batch_X_w, batch_X_h, + batch_X_a, batch_mask_1, batch_mask_2, batch_Y, keep_prob, y_conv): + #comb_track_cost_list = tracklet_mat['comb_track_cost'].copy() + #save_fea_mat = tracklet_mat['save_fea_mat'].copy() + + global track_struct + tracklet_mat = track_struct['tracklet_mat'] + track_interval = tracklet_mat['track_interval'].copy() + cluster1 = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + if len(cluster1)==1: + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + track_class = track_struct['tracklet_mat']['track_class'][track_id] + t_cluster_idx = track_struct['tracklet_mat']['track_cluster_t_idx'][track_class] + + NN_cluster = len(tracklet_mat['track_cluster']) + new_cluster_cost_vec = float("inf")*np.ones((NN_cluster,1)) + prev_cost_vec = np.zeros((NN_cluster,1)) + cross_cost_vec = np.zeros((NN_cluster,2)) + + for nn in range(len(t_cluster_idx)): + N_cluster = len(track_struct['tracklet_mat']['time_cluster'][t_cluster_idx[nn]]) + + for mm in range(N_cluster): + n = track_struct['tracklet_mat']['time_cluster'][t_cluster_idx[nn]][mm] + + # the original cluster + if tracklet_mat['track_class'][track_id]==n: + continue + + # check neighbor and conflict track + cluster2 = tracklet_mat['track_cluster'][n].copy() + if len(cluster2)<=1: + continue + + neighbor_flag = 1 + conflict_flag = 0 + #remove_flag = 0 + temp_cluster_set = cluster1+cluster2 + sort_idx = np.argsort(track_interval[np.array(temp_cluster_set),1]) + for m in range(0, len(sort_idx)-1): + track_id1 = temp_cluster_set[sort_idx[m]] + track_id2 = temp_cluster_set[sort_idx[m+1]] + + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + neighbor_flag = 0 + break + if track_id1 in tracklet_mat['conflict_track_idx'][track_id2]: + conflict_flag = 1 + break + + if neighbor_flag==0 or conflict_flag==1:# or remove_flag==1: + continue + + + # get cost + new_cluster_cost_vec[n,0] = comb_cost(cluster1+cluster2, feature_size, + max_length, img_size, sess, batch_X_x, batch_X_y, + batch_X_w, batch_X_h, batch_X_a, batch_mask_1, + batch_mask_2, batch_Y, keep_prob, y_conv) + #tracklet_mat['comb_track_cost'] = comb_track_cost_list.copy() + #tracklet_mat['save_fea_mat'] = save_fea_mat.copy() + + prev_cost_vec[n,0] = tracklet_mat['cluster_cost'][tracklet_mat['track_class'][track_id]] \ + +tracklet_mat['cluster_cost'][n] + + ''' + # cross cost + comb_cluster = cluster1+cluster2 + sort_idx = np.argsort(track_interval[np.array(comb_cluster),1]) + for m in range(0, len(sort_idx)-1): + track_id1 = comb_cluster[sort_idx[m]] + track_id2 = comb_cluster[sort_idx[m+1]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + continue + if (track_id1 in cluster1 and track_id2 in cluster2) or (track_id1 in cluster2 and track_id2 in cluster1): + cross_cost_vec[n,0] = cross_cost_vec[n,0]+comb_cost([track_id1,track_id2], feature_size, max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, + batch_Y, keep_prob, y_conv) + ''' + prev_cost_vec = prev_cost_vec[:,0]-cross_cost_vec[:,0] + diff_cost_vec = new_cluster_cost_vec[:,0]-prev_cost_vec + min_idx = np.argmin(diff_cost_vec) + cost = new_cluster_cost_vec[min_idx,0] + if cost==float("inf"): + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + diff_cost = diff_cost_vec[min_idx] + new_cluster_cost = np.zeros((2,1)) + new_cluster_cost[0,0] = cost + change_cluster_idx = [tracklet_mat['track_class'][track_id], min_idx] + new_cluster_set = [] + temp_set = cluster1.copy() + temp_set = temp_set+tracklet_mat['track_cluster'][min_idx] + new_cluster_set.append(temp_set) + new_cluster_set.append([]) + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + +def get_switch_cost(track_id, sess, img_size, batch_X_x, batch_X_y, batch_X_w, batch_X_h, + batch_X_a, batch_mask_1, batch_mask_2, batch_Y, keep_prob, y_conv): + #comb_track_cost_list = tracklet_mat['comb_track_cost'].copy() + #save_fea_mat = tracklet_mat['save_fea_mat'].copy() + + global track_struct + tracklet_mat = track_struct['tracklet_mat'] + track_interval = tracklet_mat['track_interval'].copy() + cluster1 = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + S1 = [] + S2 = [] + for k in range(len(cluster1)): + temp_id = cluster1[k] + if tracklet_mat['track_interval'][temp_id,1]<=tracklet_mat['track_interval'][track_id,1]: + S1.append(temp_id) + else: + S2.append(temp_id) + if len(S1)==0 or len(S2)==0: + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + track_class = track_struct['tracklet_mat']['track_class'][track_id] + t_cluster_idx = track_struct['tracklet_mat']['track_cluster_t_idx'][track_class] + + NN_cluster = len(tracklet_mat['track_cluster']) + cost_vec = float("inf")*np.ones((NN_cluster,1)) + prev_cost_vec = np.zeros((NN_cluster,1)) + new_cluster_cost_vec1 = float("inf")*np.ones((NN_cluster,1)) + new_cluster_cost_vec2 = float("inf")*np.ones((NN_cluster,1)) + cross_cost_vec = np.zeros((NN_cluster,2)) + + track_id_set = [] + for n in range(NN_cluster): + track_id_set.append([]) + + for nn in range(len(t_cluster_idx)): + N_cluster = len(track_struct['tracklet_mat']['time_cluster'][t_cluster_idx[nn]]) + + for mm in range(N_cluster): + n = track_struct['tracklet_mat']['time_cluster'][t_cluster_idx[nn]][mm] + + # the original cluster + if tracklet_mat['track_class'][track_id]==n: + continue + + # switch availability check + S3 = [] + S4 = [] + #remove_flag = 0 + cluster2 = tracklet_mat['track_cluster'][n].copy() + for k in range(len(cluster2)): + temp_id = cluster2[k] + #if temp_id in remove_set: + # remove_flag = 1 + # break + if tracklet_mat['track_interval'][temp_id,1]<=tracklet_mat['track_interval'][track_id,1]: + S3.append(temp_id) + else: + #******************************************** + if tracklet_mat['track_interval'][temp_id,1] >=tracklet_mat['track_interval'][track_id,1] \ + and tracklet_mat['track_interval'][temp_id,0] <=tracklet_mat['track_interval'][track_id,1]: + if tracklet_mat['track_interval'][temp_id,1] -tracklet_mat['track_interval'][track_id,1] \ + >tracklet_mat['track_interval'][track_id,1]-tracklet_mat['track_interval'][temp_id,0]: + S4.append(temp_id) + else: + S3.append(temp_id) + else: + S4.append(temp_id) + + #if remove_flag==1: + # continue + + neighbor_flag1 = 1 + conflict_flag1 = 0 + if len(S3)==0: + neighbor_flag1 = 1 + conflict_flag1 = 0 + else: + temp_cluster_set = S3+S2 + sort_idx = np.argsort(track_interval[np.array(temp_cluster_set),1]) + for k in range(0,len(sort_idx)-1): + track_id1 = temp_cluster_set[sort_idx[k]] + track_id2 = temp_cluster_set[sort_idx[k+1]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + neighbor_flag1 = 0 + break + if track_id1 in tracklet_mat['conflict_track_idx'][track_id2]: + conflict_flag1 = 1 + break + + + neighbor_flag2 = 1 + conflict_flag2 = 0 + if len(S4)==0: + neighbor_flag2 = 1 + conflict_flag2 = 0 + else: + temp_cluster_set = S4+S1 + sort_idx = np.argsort(track_interval[np.array(temp_cluster_set),1]) + for k in range(0,len(sort_idx)-1): + track_id1 = temp_cluster_set[sort_idx[k]] + track_id2 = temp_cluster_set[sort_idx[k+1]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + neighbor_flag2 = 0 + break + if track_id1 in tracklet_mat['conflict_track_idx'][track_id2]: + conflict_flag2 = 1 + break + + if neighbor_flag1==0 or conflict_flag1==1 or neighbor_flag2==0 or conflict_flag2==1: + continue + + + + # get cost + S_1 = S1+S4 + S_2 = S2+S3 + + #if (428 in S_1 and 435 in S_1) or (428 in S_2 and 435 in S_2): + # import pdb; pdb.set_trace() + + new_cluster_cost_vec1[n,0] = comb_cost(S_1, feature_size, max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, + batch_Y, keep_prob, y_conv) + #tracklet_mat['comb_track_cost'] = comb_track_cost_list.copy() + #tracklet_mat['save_fea_mat'] = save_fea_mat.copy() + new_cluster_cost_vec2[n,0] = comb_cost(S_2, feature_size, max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, + batch_Y, keep_prob, y_conv) + #tracklet_mat['comb_track_cost'] = comb_track_cost_list.copy() + #tracklet_mat['save_fea_mat'] = save_fea_mat.copy() + cost_vec[n,0] = new_cluster_cost_vec1[n,0]+new_cluster_cost_vec2[n,0] + + track_id_set[n].append(S_1.copy()) + track_id_set[n].append(S_2.copy()) + prev_cost_vec[n,0] = tracklet_mat['cluster_cost'][tracklet_mat['track_class'][track_id]] \ + +tracklet_mat['cluster_cost'][n] + + ''' + # cross cost + comb_cluster = S_1+S_2 + sort_idx = np.argsort(track_interval[np.array(comb_cluster),1]) + for m in range(0, len(sort_idx)-1): + track_id1 = comb_cluster[sort_idx[m]] + track_id2 = comb_cluster[sort_idx[m+1]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + continue + if (track_id1 in cluster1 and track_id2 in cluster2) or (track_id1 in cluster2 and track_id2 in cluster1): + cross_cost_vec[n,0] = cross_cost_vec[n,0]+comb_cost([track_id1,track_id2], feature_size, max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, + batch_Y, keep_prob, y_conv) + if (track_id1 in S_1 and track_id2 in S_2) or (track_id1 in S_2 and track_id2 in S_1): + cross_cost_vec[n,1] = cross_cost_vec[n,1]+comb_cost([track_id1,track_id2], feature_size, max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, + batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, + batch_Y, keep_prob, y_conv) + ''' + + cost_vec = cost_vec[:,0]-cross_cost_vec[:,1] + prev_cost_vec = prev_cost_vec[:,0]-cross_cost_vec[:,0] + diff_cost_vec = cost_vec-prev_cost_vec + min_idx = np.argmin(diff_cost_vec) + cost = cost_vec[min_idx] + if cost==float("inf"): + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + diff_cost = diff_cost_vec[min_idx] + new_cluster_cost = np.zeros((2,1)) + new_cluster_cost[0,0] = new_cluster_cost_vec1[min_idx,0] + new_cluster_cost[1,0] = new_cluster_cost_vec2[min_idx,0] + + change_cluster_idx = [tracklet_mat['track_class'][track_id], min_idx] + new_cluster_set = [] + new_cluster_set.append(track_id_set[min_idx][0]) + new_cluster_set.append(track_id_set[min_idx][1]) + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + +def get_break_cost(track_id, sess, img_size, batch_X_x, batch_X_y, batch_X_w, batch_X_h, + batch_X_a, batch_mask_1, batch_mask_2, batch_Y, keep_prob, y_conv): + #comb_track_cost_list = tracklet_mat['comb_track_cost'].copy() + #save_fea_mat = tracklet_mat['save_fea_mat'].copy() + ''' + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + ''' + + global track_struct + tracklet_mat = track_struct['tracklet_mat'] + + new_cluster_cost = np.zeros((2,1)) + cluster1 = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + if len(cluster1)<=2: + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + # get cost + after_ids = [] + for n in range(len(cluster1)): + if tracklet_mat['track_interval'][cluster1[n],1]>tracklet_mat['track_interval'][track_id,1]: + after_ids.append(cluster1[n]) + + if len(after_ids)==0: + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + before_ids = list(set(cluster1)-set(after_ids)) + if len(before_ids)<=1: + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + change_cluster_idx = [len(tracklet_mat['track_cluster']), tracklet_mat['track_class'][track_id]] + new_cluster_set = [] + new_cluster_set.append(before_ids) + remain_tracks = after_ids + new_cluster_set.append(remain_tracks) + new_cluster_cost[0,0] = comb_cost(new_cluster_set[0], feature_size, + max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, batch_X_h, + batch_X_a, batch_mask_1, batch_mask_2, batch_Y, keep_prob, + y_conv) + #tracklet_mat['comb_track_cost'] = comb_track_cost_list.copy() + #tracklet_mat['save_fea_mat'] = save_fea_mat.copy() + new_cluster_cost[1,0] = comb_cost(new_cluster_set[1], feature_size, + max_length, + img_size, sess, batch_X_x, batch_X_y, batch_X_w, batch_X_h, + batch_X_a, batch_mask_1, batch_mask_2, batch_Y, keep_prob, + y_conv) + #tracklet_mat['comb_track_cost'] = comb_track_cost_list.copy() + #tracklet_mat['save_fea_mat'] = save_fea_mat.copy() + cost = np.sum(new_cluster_cost) + diff_cost = cost-tracklet_mat['cluster_cost'][tracklet_mat['track_class'][track_id]] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + +def update_tracklet_mat(tracklet_mat): + final_tracklet_mat = tracklet_mat.copy() + track_interval = tracklet_mat['track_interval'] + num_cluster = len(tracklet_mat['track_cluster']) + final_tracklet_mat['track_id_mat'] = -1*np.ones((num_cluster,tracklet_mat['xmin_mat'].shape[1])) + + final_xmin_mat = -1*np.ones((num_cluster, final_tracklet_mat['xmin_mat'].shape[1])) + final_ymin_mat = -1*np.ones((num_cluster, final_tracklet_mat['ymin_mat'].shape[1])) + final_xmax_mat = -1*np.ones((num_cluster, final_tracklet_mat['xmax_mat'].shape[1])) + final_ymax_mat = -1*np.ones((num_cluster, final_tracklet_mat['ymax_mat'].shape[1])) + final_det_score_mat = -1*np.ones((num_cluster, final_tracklet_mat['det_score_mat'].shape[1])) + final_tracklet_mat['xmin_mat'] = final_xmin_mat.copy() + final_tracklet_mat['ymin_mat'] = final_ymin_mat.copy() + final_tracklet_mat['xmax_mat'] = final_xmax_mat.copy() + final_tracklet_mat['ymax_mat'] = final_ymax_mat.copy() + final_tracklet_mat['det_score_mat'] = final_det_score_mat.copy() + + for n in range(num_cluster): + for k in range(len(final_tracklet_mat['track_cluster'][n])): + temp_id = final_tracklet_mat['track_cluster'][n][k] + #import pdb; pdb.set_trace() + final_tracklet_mat['track_id_mat'][n,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] = temp_id + final_xmin_mat[n,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] \ + = tracklet_mat['xmin_mat'][temp_id,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] + final_ymin_mat[n,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] \ + = tracklet_mat['ymin_mat'][temp_id,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] + final_xmax_mat[n,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] \ + = tracklet_mat['xmax_mat'][temp_id,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] + final_ymax_mat[n,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] \ + = tracklet_mat['ymax_mat'][temp_id,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] + final_det_score_mat[n,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] \ + = tracklet_mat['det_score_mat'][temp_id,int(track_interval[temp_id,0]):int(track_interval[temp_id,1])+1] + + det_xmin_mat = final_xmin_mat.copy() + det_ymin_mat = final_ymin_mat.copy() + det_xmax_mat = final_xmax_mat.copy() + det_ymax_mat = final_ymax_mat.copy() + det_det_score_mat = final_det_score_mat.copy() + + window_size = 2 + for n in range(num_cluster): + det_idx = np.where(final_xmin_mat[n,:]!=-1)[0] + t_min = np.min(det_idx) + t_max = np.max(det_idx) + miss_idx = np.where(final_xmin_mat[n,t_min:t_max+1]==-1)[0] + if len(miss_idx)==0: + continue + miss_idx = miss_idx+t_min + final_xmin_mat[n,miss_idx] = np.interp(miss_idx, det_idx, final_xmin_mat[n,det_idx]) + + final_ymin_mat[n,miss_idx] = np.interp(miss_idx, det_idx, final_ymin_mat[n,det_idx]) + + final_xmax_mat[n,miss_idx] = np.interp(miss_idx, det_idx, final_xmax_mat[n,det_idx]) + + final_ymax_mat[n,miss_idx] = np.interp(miss_idx, det_idx, final_ymax_mat[n,det_idx]) + + final_det_score_mat[n,miss_idx] = np.interp(miss_idx, det_idx, final_det_score_mat[n,det_idx]) + + + ''' + # merge two trajectories if they overlap + bbox_overlap_thresh = 0.7 + time_overlap_tresh = 5 + det_overlap_thresh = 0.1 + bbox_overlap_mat = np.zeros((num_cluster,num_cluster)) + for n in range(num_cluster-1): + for m in range(n+1,num_cluster): + cand_t = np.where(np.logical_and(final_xmin_mat[n,:]!=-1, final_xmin_mat[m,:]!=-1))[0] + if len(cand_t)det_overlap_thresh: + continue + + final_tracklet_mat['track_id_mat'][n,int(t2_min):int(t2_max)+1] = \ + final_tracklet_mat['track_id_mat'][m,int(t2_min):int(t2_max)+1] + final_xmin_mat[n,int(t2_min):int(t2_max)+1] = final_xmin_mat[m,int(t2_min):int(t2_max)+1] + final_ymin_mat[n,int(t2_min):int(t2_max)+1] = final_ymin_mat[m,int(t2_min):int(t2_max)+1] + final_xmax_mat[n,int(t2_min):int(t2_max)+1] = final_xmax_mat[m,int(t2_min):int(t2_max)+1] + final_ymax_mat[n,int(t2_min):int(t2_max)+1] = final_ymax_mat[m,int(t2_min):int(t2_max)+1] + final_det_score_mat[n,int(t2_min):int(t2_max)+1] = final_det_score_mat[m,int(t2_min):int(t2_max)+1] + + final_tracklet_mat['track_id_mat'][m,int(t2_min):int(t2_max)+1] = -1 + final_xmin_mat[m,:] = -1 + final_ymin_mat[m,:] = -1 + final_xmax_mat[m,:] = -1 + final_ymax_mat[m,:] = -1 + final_det_score_mat[m,:] = -1 + + ''' + final_tracklet_mat['xmin_mat'] = final_xmin_mat + final_tracklet_mat['ymin_mat'] = final_ymin_mat + final_tracklet_mat['xmax_mat'] = final_xmax_mat + final_tracklet_mat['ymax_mat'] = final_ymax_mat + final_tracklet_mat['det_score_mat'] = final_det_score_mat + + + + # moving average + for n in range(num_cluster): + cand_t = np.where(final_xmin_mat[n,:]!=-1)[0] + if len(cand_t)==0: + continue + t1 = int(np.min(cand_t)) + t2 = int(np.max(cand_t)) + for k in range(t1,t2+1): + t_start = max(k-window_size,t1) + t_end = min(k+window_size,t2) + final_tracklet_mat['xmin_mat'][n,k] = np.sum(final_xmin_mat[n,t_start:t_end+1])/(t_end-t_start+1) + final_tracklet_mat['ymin_mat'][n,k] = np.sum(final_ymin_mat[n,t_start:t_end+1])/(t_end-t_start+1) + final_tracklet_mat['xmax_mat'][n,k] = np.sum(final_xmax_mat[n,t_start:t_end+1])/(t_end-t_start+1) + final_tracklet_mat['ymax_mat'][n,k] = np.sum(final_ymax_mat[n,t_start:t_end+1])/(t_end-t_start+1) + final_tracklet_mat['det_score_mat'][n,k] = np.sum(final_det_score_mat[n,t_start:t_end+1])/(t_end-t_start+1) + + + return final_tracklet_mat + +def post_processing(): + + global track_struct + tracklet_mat = track_struct['tracklet_mat'] + track_params = track_struct['track_params'] + new_tracklet_mat = tracklet_mat.copy() + #import pdb; pdb.set_trace() + + # update track cluster + N_cluster = len(tracklet_mat["track_cluster"]) + remove_idx = [] + for n in range(N_cluster): + if len(tracklet_mat["track_cluster"][n])==0: + remove_idx.append(n) + continue + if tracklet_mat["track_cluster"][n][0] in remove_set: + remove_idx.append(n) + continue + + temp_track_intervals = tracklet_mat["track_interval"][np.array(tracklet_mat["track_cluster"][n]),:] + start_fr = np.min(temp_track_intervals[:,0]) + end_fr = np.max(temp_track_intervals[:,1]) + num_frs = end_fr-start_fr+1 + if num_frs600: + # import pdb; pdb.set_trace() + + # remove_set + if t_cluster_idx[0]==-1: + continue + + #if track_struct['tracklet_mat']['track_class'][track_id]<0: + # continue + + #if track_id in remove_set: + # continue + + diff_cost = np.zeros((5,1)) + new_C = [] # new cost + new_set = [] + change_idx = [] + + #cluster_cost = track_struct['tracklet_mat']['cluster_cost'] + #track_class = track_struct['tracklet_mat']['track_class'] + + # get split cost + #import pdb; pdb.set_trace() + diff_cost[0,0],temp_new_C,temp_new_set,temp_change_idx \ + = get_split_cost(track_id, sess, img_size, + batch_X_x, batch_X_y, batch_X_w, batch_X_h, batch_X_a, batch_mask_1, + batch_mask_2, batch_Y, keep_prob, y_conv) + #track_struct['tracklet_mat']['comb_track_cost'] = comb_track_cost_list.copy() + #track_struct['tracklet_mat']['save_fea_mat'] = save_fea_mat.copy() + new_C.append(temp_new_C) + new_set.append(temp_new_set) + change_idx.append(temp_change_idx) + + # get assign cost + #import pdb; pdb.set_trace() + diff_cost[1,0],temp_new_C,temp_new_set,temp_change_idx \ + = get_assign_cost(track_id, sess, img_size, + batch_X_x, batch_X_y, batch_X_w, batch_X_h, batch_X_a, batch_mask_1, + batch_mask_2, batch_Y, keep_prob, y_conv) + #track_struct['tracklet_mat']['comb_track_cost'] = comb_track_cost_list.copy() + #track_struct['tracklet_mat']['save_fea_mat'] = save_fea_mat.copy() + new_C.append(temp_new_C) + new_set.append(temp_new_set) + change_idx.append(temp_change_idx) + + # get merge cost + diff_cost[2,0],temp_new_C,temp_new_set,temp_change_idx \ + = get_merge_cost(track_id, sess, img_size, + batch_X_x, batch_X_y, batch_X_w, batch_X_h, batch_X_a, batch_mask_1, + batch_mask_2, batch_Y, keep_prob, y_conv) + #track_struct['tracklet_mat']['comb_track_cost'] = comb_track_cost_list.copy() + #track_struct['tracklet_mat']['save_fea_mat'] = save_fea_mat.copy() + new_C.append(temp_new_C) + new_set.append(temp_new_set) + change_idx.append(temp_change_idx) + + # get switch cost + diff_cost[3,0],temp_new_C,temp_new_set,temp_change_idx \ + = get_switch_cost(track_id, sess, img_size, + batch_X_x, batch_X_y, batch_X_w, batch_X_h, batch_X_a, batch_mask_1, + batch_mask_2, batch_Y, keep_prob, y_conv) + #track_struct['tracklet_mat']['comb_track_cost'] = comb_track_cost_list.copy() + #track_struct['tracklet_mat']['save_fea_mat'] = save_fea_mat.copy() + new_C.append(temp_new_C) + new_set.append(temp_new_set) + change_idx.append(temp_change_idx) + + # get break cost + diff_cost[4,0],temp_new_C,temp_new_set,temp_change_idx \ + = get_break_cost(track_id, sess, img_size, + batch_X_x, batch_X_y, batch_X_w, batch_X_h, batch_X_a, batch_mask_1, + batch_mask_2, batch_Y, keep_prob, y_conv) + #track_struct['tracklet_mat']['comb_track_cost'] = comb_track_cost_list.copy() + #track_struct['tracklet_mat']['save_fea_mat'] = save_fea_mat.copy() + new_C.append(temp_new_C) + new_set.append(temp_new_set) + change_idx.append(temp_change_idx) + + # update cluster + min_idx = np.argmin(diff_cost[:,0]) + min_cost = diff_cost[min_idx,0] + if min_cost>=0: + continue + + change_flag = 1 + #if track_id==251: + # import pdb; pdb.set_trace() + + #**************** + #import pdb; pdb.set_trace() + print(min_idx) + print(new_set) + new_t_idx = [] + if len(new_set[min_idx][0])==0: + new_t_idx.append([-1]) + else: + t_min_array = np.zeros((len(new_set[min_idx][0]),1)) + t_max_array = np.zeros((len(new_set[min_idx][0]),1)) + for m in range(len(new_set[min_idx][0])): + t_min_array[m,0] = track_struct['tracklet_mat']['track_interval'][new_set[min_idx][0][m],0] + t_max_array[m,0] = track_struct['tracklet_mat']['track_interval'][new_set[min_idx][0][m],1] + + min_time_cluster_idx = int(np.floor(max(np.min(t_min_array) + -track_struct['track_params']['t_dist_thresh']-5,0) + /track_struct['track_params']['time_cluster_dist'])) + max_time_cluster_idx = int(np.floor(min(np.max(t_max_array) + +track_struct['track_params']['t_dist_thresh']+5, + track_struct['tracklet_mat']['xmin_mat'].shape[1]-1) + /track_struct['track_params']['time_cluster_dist'])) + new_t_idx.append(list(range(min_time_cluster_idx,max_time_cluster_idx+1))) + + if len(new_set[min_idx][1])==0: + new_t_idx.append([-1]) + else: + t_min_array = np.zeros((len(new_set[min_idx][1]),1)) + t_max_array = np.zeros((len(new_set[min_idx][1]),1)) + for m in range(len(new_set[min_idx][1])): + t_min_array[m,0] = track_struct['tracklet_mat']['track_interval'][new_set[min_idx][1][m],0] + t_max_array[m,0] = track_struct['tracklet_mat']['track_interval'][new_set[min_idx][1][m],1] + + min_time_cluster_idx = int(np.floor(max(np.min(t_min_array) + -track_struct['track_params']['t_dist_thresh']-5,0) + /track_struct['track_params']['time_cluster_dist'])) + max_time_cluster_idx = int(np.floor(min(np.max(t_max_array) + +track_struct['track_params']['t_dist_thresh']+5, + track_struct['tracklet_mat']['xmin_mat'].shape[1]-1) + /track_struct['track_params']['time_cluster_dist'])) + new_t_idx.append(list(range(min_time_cluster_idx,max_time_cluster_idx+1))) + + if change_idx[min_idx][0]>=len(track_struct['tracklet_mat']['track_cluster']): + for m in range(len(track_struct['tracklet_mat']['track_cluster']),change_idx[min_idx][0]): + track_struct['tracklet_mat']['track_cluster'].append([]) + track_struct['tracklet_mat']['track_cluster_t_idx'].append([-1]) + track_struct['tracklet_mat']['track_cluster'].append(new_set[min_idx][0]) + track_struct['tracklet_mat']['track_cluster_t_idx'].append(new_t_idx[0]) + else: + track_struct['tracklet_mat']['track_cluster'][change_idx[min_idx][0]] = new_set[min_idx][0] + track_struct['tracklet_mat']['track_cluster_t_idx'][change_idx[min_idx][0]] = new_t_idx[0] + + if change_idx[min_idx][1]>=len(track_struct['tracklet_mat']['track_cluster']): + for m in range(len(track_struct['tracklet_mat']['track_cluster']),change_idx[min_idx][1]): + track_struct['tracklet_mat']['track_cluster'].append([]) + track_struct['tracklet_mat']['track_cluster_t_idx'].append([-1]) + track_struct['tracklet_mat']['track_cluster'].append(new_set[min_idx][1]) + track_struct['tracklet_mat']['track_cluster_t_idx'].append(new_t_idx[1]) + else: + track_struct['tracklet_mat']['track_cluster'][change_idx[min_idx][1]] = new_set[min_idx][1] + track_struct['tracklet_mat']['track_cluster_t_idx'][change_idx[min_idx][1]] = new_t_idx[1] + + for m in range(track_struct['track_params']['num_time_cluster']): + #import pdb; pdb.set_trace() + if change_idx[min_idx][0] in track_struct['tracklet_mat']['time_cluster'][m]: + track_struct['tracklet_mat']['time_cluster'][m].remove(change_idx[min_idx][0]) + if change_idx[min_idx][1] in track_struct['tracklet_mat']['time_cluster'][m]: + track_struct['tracklet_mat']['time_cluster'][m].remove(change_idx[min_idx][1]) + + for m in range(track_struct['track_params']['num_time_cluster']): + if m in new_t_idx[0]: + track_struct['tracklet_mat']['time_cluster'][m].append(change_idx[min_idx][0]) + if m in new_t_idx[1]: + track_struct['tracklet_mat']['time_cluster'][m].append(change_idx[min_idx][1]) + + if change_idx[min_idx][0]>=len(track_struct['tracklet_mat']['cluster_cost']): + for m in range(len(track_struct['tracklet_mat']['cluster_cost']),change_idx[min_idx][0]): + track_struct['tracklet_mat']['cluster_cost'].append(0) + track_struct['tracklet_mat']['cluster_cost'].append(new_C[min_idx][0]) + else: + track_struct['tracklet_mat']['cluster_cost'][change_idx[min_idx][0]] = new_C[min_idx][0] + + if change_idx[min_idx][1]>=len(track_struct['tracklet_mat']['cluster_cost']): + for m in range(len(track_struct['tracklet_mat']['cluster_cost']),change_idx[min_idx][1]): + track_struct['tracklet_mat']['cluster_cost'].append([]) + track_struct['tracklet_mat']['cluster_cost'].append(new_C[min_idx][1]) + else: + track_struct['tracklet_mat']['cluster_cost'][change_idx[min_idx][1]] = new_C[min_idx][1] + + for k in range(len(track_struct['tracklet_mat']['track_cluster'][change_idx[min_idx][0]])): + track_struct['tracklet_mat']['track_class'][track_struct['tracklet_mat'] \ + ['track_cluster'][change_idx[min_idx][0]][k]] = change_idx[min_idx][0] + + for k in range(len(track_struct['tracklet_mat']['track_cluster'][change_idx[min_idx][1]])): + track_struct['tracklet_mat']['track_class'][track_struct['tracklet_mat'] \ + ['track_cluster'][change_idx[min_idx][1]][k]] = change_idx[min_idx][1] + #import pdb; pdb.set_trace() + return change_flag + +def crop_det(tracklet_mat, crop_size, img_folder, crop_det_folder, flag): + + if not os.path.isdir(crop_det_folder): + os.makedirs(crop_det_folder) + + N_tracklet = tracklet_mat['xmin_mat'].shape[0] + T = tracklet_mat['xmin_mat'].shape[1] + img_list = os.listdir(img_folder) + cnt = 0 + #import pdb; pdb.set_trace() + for n in range(T): + fr_id = n + track_ids = np.where(tracklet_mat['xmax_mat'][:,n]!=-1) + if len(track_ids)==0: + continue + track_ids = track_ids[0] + img_name = file_name(fr_id,10)+'.png' + #import pdb; pdb.set_trace() + if img_name in img_list: + img_path = img_folder+'/'+img_name + img = misc.imread(img_path) + img_size = img.shape + else: + continue + + for m in range(len(track_ids)): + if flag==0: + xmin = int(max(0,tracklet_mat['xmin_mat'][track_ids[m],n])) + xmax = int(min(img.shape[1]-1,tracklet_mat['xmax_mat'][track_ids[m],n])) + ymin = int(max(0,tracklet_mat['ymin_mat'][track_ids[m],n])) + ymax = int(min(img.shape[0]-1,tracklet_mat['ymax_mat'][track_ids[m],n])) + img_patch = img[ymin:ymax,xmin:xmax,:] + img_patch = misc.imresize(img_patch, size=[crop_size,crop_size]) + class_name = file_name(track_ids[m]+1,4) + patch_name = class_name+'_'+file_name(fr_id,4)+'.png' + save_path = crop_det_folder+'/'+class_name + if not os.path.isdir(save_path): + os.makedirs(save_path) + save_path = save_path+'/'+patch_name + + #import pdb; pdb.set_trace() + misc.imsave(save_path, img_patch) + cnt = cnt+1 + return cnt, img_size + +def feature_encode(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, + batch_size_placeholder, control_placeholder, embeddings, labels, image_paths, + batch_size, distance_metric): + + # Run forward pass to calculate embeddings + #print('Runnning forward pass on LFW images') + + use_flipped_images = False + use_fixed_image_standardization = False + use_random_rotate = False + use_radnom_crop = False + # Enqueue one epoch of image paths and labels + nrof_embeddings = len(image_paths) # nrof_pairs * nrof_images_per_pair + nrof_flips = 2 if use_flipped_images else 1 + nrof_images = nrof_embeddings * nrof_flips + labels_array = np.expand_dims(np.arange(0,nrof_images),1) + image_paths_array = np.expand_dims(np.repeat(np.array(image_paths),nrof_flips),1) + control_array = np.zeros_like(labels_array, np.int32) + + if use_fixed_image_standardization: + control_array += np.ones_like(labels_array)*facenet.FIXED_STANDARDIZATION + if use_flipped_images: + # Flip every second image + control_array += (labels_array % 2)*facenet.FLIP + if use_random_rotate: + control_array += facenet.RANDOM_ROTATE + if use_radnom_crop: + control_array += facenet.RANDOM_CROP + + sess.run(enqueue_op, {image_paths_placeholder: image_paths_array, + labels_placeholder: labels_array, control_placeholder: control_array}) + + embedding_size = int(embeddings.get_shape()[1]) + assert nrof_images % batch_size == 0, 'The number of LFW images must be an integer multiple of the LFW batch size' + nrof_batches = nrof_images // batch_size + emb_array = np.zeros((nrof_images, embedding_size)) + lab_array = np.zeros((nrof_images,)) + for i in range(nrof_batches): + feed_dict = {phase_train_placeholder:False, batch_size_placeholder:batch_size} + emb, lab = sess.run([embeddings, labels], feed_dict=feed_dict) + lab_array[lab] = lab + emb_array[lab, :] = emb + if i % 10 == 9: + print('.', end='') + sys.stdout.flush() + #import pdb; pdb.set_trace() + #np.savetxt("emb_array.csv", emb_array, delimiter=",") + return emb_array + +def feature_extract(feature_size, num_patch, max_length, patch_folder, triplet_model): + f_image_size = 160 + distance_metric = 0 + with tf.Graph().as_default(): + + with tf.Session() as sess: + + image_paths_placeholder = tf.placeholder(tf.string, shape=(None,1), name='image_paths') + labels_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='labels') + batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') + control_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='control') + phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') + + nrof_preprocess_threads = 4 + image_size = (f_image_size, f_image_size) + eval_input_queue = data_flow_ops.FIFOQueue(capacity=2000000, + dtypes=[tf.string, tf.int32, tf.int32], + shapes=[(1,), (1,), (1,)], + shared_name=None, name=None) + eval_enqueue_op = eval_input_queue.enqueue_many([image_paths_placeholder, + labels_placeholder, control_placeholder], + name='eval_enqueue_op') + image_batch, label_batch = facenet.create_input_pipeline(eval_input_queue, image_size, + nrof_preprocess_threads, batch_size_placeholder) + + # Load the model + input_map = {'image_batch': image_batch, 'label_batch': label_batch, 'phase_train': phase_train_placeholder} + facenet.load_model(triplet_model, input_map=input_map) + + # Get output tensor + embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") + coord = tf.train.Coordinator() + tf.train.start_queue_runners(coord=coord, sess=sess) + + fea_mat = np.zeros((num_patch,feature_size-4+2)) + tracklet_list = os.listdir(patch_folder) + N_tracklet = len(tracklet_list) + cnt = 0 + for n in range(N_tracklet): + tracklet_folder = patch_folder+'/'+tracklet_list[n] + patch_list = os.listdir(tracklet_folder) + + # get patch list, track_id and fr_id, starts from 1 + prev_cnt = cnt + for m in range(len(patch_list)): + # track_id + fea_mat[cnt,0] = n+1 + # fr_id + fea_mat[cnt,1] = int(patch_list[m][-8:-4]) + cnt = cnt+1 + patch_list[m] = tracklet_folder+'/'+patch_list[m] + + + #print(n) + lfw_batch_size = len(patch_list) + emb_array = feature_encode(sess, eval_enqueue_op, image_paths_placeholder, labels_placeholder, + phase_train_placeholder,batch_size_placeholder, control_placeholder, + embeddings, label_batch, patch_list, lfw_batch_size, distance_metric) + fea_mat[prev_cnt:prev_cnt+lfw_batch_size,2:] = np.copy(emb_array) + return fea_mat + +def color_table(num): + digit = '0123456789ABCDEF' + table = [] + for n in range(num): + select_idx = np.random.randint(16, size=6) + for k in range(6): + if k==0: + temp_color = digit[select_idx[k]] + else: + temp_color = temp_color+digit[select_idx[k]] + table.append(temp_color) + return table + +def draw_result(img_folder, save_folder): + #track_struct = pickle.load(open(track_struct_path,'rb')) + + global track_struct + tracklet_mat = track_struct['final_tracklet_mat'] + img_list = os.listdir(img_folder) + table = color_table(len(tracklet_mat['track_cluster'])) + #import pdb; pdb.set_trace() + for n in range(track_struct['final_tracklet_mat']['xmin_mat'].shape[1]): + fr_id = n + img_name = file_name(fr_id,10)+'.png' + if img_name not in img_list: + continue + img_path = img_folder+'/'+img_name + img = misc.imread(img_path) + + # Create figure and axes + fig,ax = plt.subplots(1) + + # Display the image + ax.imshow(img) + + # Create Rectangle patches + + + for k in range(tracklet_mat['xmin_mat'].shape[0]): + # + track_id = int(tracklet_mat['track_id_mat'][k,n]) + + ''' + if track_id==-1: + track_class = -1 + else: + track_class = int(tracklet_mat['track_class'][track_id,0]) + ''' + + if tracklet_mat['xmin_mat'][k,n]!=-1: + xmin = tracklet_mat['xmin_mat'][k,n] + ymin = tracklet_mat['ymin_mat'][k,n] + xmax = tracklet_mat['xmax_mat'][k,n] + ymax = tracklet_mat['ymax_mat'][k,n] + w = xmax-xmin + h = ymax-ymin + rect = patches.Rectangle((xmin,ymin),w,h,linewidth=1,edgecolor='#'+table[k], facecolor='none') + img_text = plt.text(xmin,ymin,str(k)+'_'+str(track_id),fontsize=6,color='#'+table[k]) + # Add the patch to the Axes + ax.add_patch(rect) + + if not os.path.isdir(save_folder): + os.makedirs(save_folder) + save_path = save_folder+'/'+img_name + plt.savefig(save_path,bbox_inches='tight',dpi=400) + + plt.clf() + plt.close('all') + #plt.show() + #import pdb; pdb.set_trace() + return + +def convert_frames_to_video(pathIn,pathOut,fps): + frame_array = [] + files = [f for f in os.listdir(pathIn) if os.path.isfile(os.path.join(pathIn, f))] + + #for sorting the file names properly + #files.sort(key = lambda x: int(x[5:-4])) + + for i in range(len(files)): + filename=pathIn + files[i] + #reading each files + img = cv2.imread(filename) + height, width, layers = img.shape + + if i==0: + size = (width,height) + img = cv2.resize(img,size) + #print(filename) + #inserting the frames into an image array + frame_array.append(img) + + out = cv2.VideoWriter(pathOut,cv2.VideoWriter_fourcc(*'DIVX'), fps, size) + + for i in range(len(frame_array)): + # writing to a image array + out.write(frame_array[i]) + out.release() + +def wrt_txt(tracklet_mat): + num_det = np.sum(tracklet_mat['xmin_mat']!=-1) + f = np.zeros((num_det, 9), dtype=int) + cnt = 0 + for n in range(tracklet_mat['xmin_mat'].shape[1]): + for m in range(tracklet_mat['xmin_mat'].shape[0]): + if tracklet_mat['xmin_mat'][m,n]==-1: + continue + f[cnt,0] = n+1 + f[cnt,1] = m+1 + f[cnt,2] = tracklet_mat['xmin_mat'][m,n] + f[cnt,3] = tracklet_mat['ymin_mat'][m,n] + f[cnt,4] = tracklet_mat['xmax_mat'][m,n]-tracklet_mat['xmin_mat'][m,n]+1 + f[cnt,5] = tracklet_mat['ymax_mat'][m,n]-tracklet_mat['ymin_mat'][m,n]+1 + f[cnt,6] = -1 + f[cnt,7] = -1 + f[cnt,8] = -1 + cnt = cnt+1 + np.savetxt(txt_result_path, f, delimiter=',',fmt='%d') + +def time_cluster_check(): + + global track_struct + tracklet_mat = track_struct['tracklet_mat'] + N_cluster = len(tracklet_mat['track_cluster']) + err_flag = 0 + #import pdb; pdb.set_trace() + for n in range(N_cluster): + if len(tracklet_mat['track_cluster'][n])==0: + if tracklet_mat['track_cluster_t_idx'][n][0]!=-1: + err_flag = 1 + import pdb; pdb.set_trace() + return err_flag + elif tracklet_mat['track_cluster'][n][0] in remove_set: + if tracklet_mat['track_cluster_t_idx'][n][0]!=-1: + err_flag = 1 + import pdb; pdb.set_trace() + return err_flag + else: + t_min_array = np.zeros((len(tracklet_mat['track_cluster'][n]),1)) + t_max_array = np.zeros((len(tracklet_mat['track_cluster'][n]),1)) + for m in range(len(tracklet_mat['track_cluster'][n])): + track_id = tracklet_mat['track_cluster'][n][m] + t_min_array[m,0] = tracklet_mat['track_interval'][track_id,0] + t_max_array[m,0] = tracklet_mat['track_interval'][track_id,1] + min_time_cluster_idx = int(np.floor(max(np.min(t_min_array) + -track_struct['track_params']['t_dist_thresh']-5,0) + /track_struct['track_params']['time_cluster_dist'])) + max_time_cluster_idx = int(np.floor(min(np.max(t_max_array) + +track_struct['track_params']['t_dist_thresh']+5,tracklet_mat['xmin_mat'].shape[1]-1) + /track_struct['track_params']['time_cluster_dist'])) + temp_t_idx = list(range(min_time_cluster_idx,max_time_cluster_idx+1)) + for m in range(len(temp_t_idx)): + if n not in tracklet_mat['time_cluster'][temp_t_idx[m]]: + err_flag = 1 + import pdb; pdb.set_trace() + return err_flag + + for n in range(len(tracklet_mat['time_cluster'])): + for m in range(len(tracklet_mat['time_cluster'][n])): + cluster_id = tracklet_mat['time_cluster'][n][m] + + if len(tracklet_mat['track_cluster'][cluster_id])==0: + err_flag = 1 + import pdb; pdb.set_trace() + return err_flag + elif tracklet_mat['track_cluster'][cluster_id][0] in remove_set: + err_flag = 1 + import pdb; pdb.set_trace() + return err_flag + else: + t_min_array = np.zeros((len(tracklet_mat['track_cluster'][cluster_id]),1)) + t_max_array = np.zeros((len(tracklet_mat['track_cluster'][cluster_id]),1)) + for k in range(len(tracklet_mat['track_cluster'][cluster_id])): + track_id = tracklet_mat['track_cluster'][cluster_id][k] + t_min_array[k,0] = tracklet_mat['track_interval'][track_id,0] + t_max_array[k,0] = tracklet_mat['track_interval'][track_id,1] + min_time_cluster_idx = int(np.floor(max(np.min(t_min_array) + -track_struct['track_params']['t_dist_thresh']-5,0) + /track_struct['track_params']['time_cluster_dist'])) + max_time_cluster_idx = int(np.floor(min(np.max(t_max_array) + +track_struct['track_params']['t_dist_thresh']+5,tracklet_mat['xmin_mat'].shape[1]-1) + /track_struct['track_params']['time_cluster_dist'])) + temp_t_idx = list(range(min_time_cluster_idx,max_time_cluster_idx+1)) + if n not in temp_t_idx: + err_flag = 1 + import pdb; pdb.set_trace() + return err_flag + return err_flag + +def TC_tracker(): + M = load_detection(det_path, 'KITTI_3d_2') + global track_struct + global all_fea_mat + global all_fea_label + track_struct = {'track_params':{}} + track_struct['track_params']['num_fr'] = int(M[-1,0]-M[0,0]+1) + track_struct['track_params']['IOU_thresh'] = 0.3 + track_struct['track_params']['color_thresh'] = 0.05 + track_struct['track_params']['det_thresh'] = -2 + track_struct['track_params']['linear_pred_thresh'] = 5 + track_struct['track_params']['t_dist_thresh'] = 10 + track_struct['track_params']['track_overlap_thresh'] = 0.1 + track_struct['track_params']['search_radius'] = 1.5 + track_struct['track_params']['const_fr_thresh'] = 5 + track_struct['track_params']['crop_size'] = 182 + track_struct['track_params']['loc_scales'] = [100,30,5,5] + track_struct['track_params']['time_cluster_dist'] = 100 + track_struct['track_params']['num_time_cluster'] = int(np.ceil(track_struct['track_params']['num_fr'] + /track_struct['track_params']['time_cluster_dist'])) + track_struct['track_obj'] = {'track_id':[], 'bbox':[], 'bbox_3d':[] ,'det_score':[], 'mean_color':[]} + track_struct['tracklet_mat'] = {'xmin_mat':[], 'ymin_mat':[], 'xmax_mat':[], 'ymax_mat':[], 'x_3d_mat':[], 'y_3d_mat':[], + 'w_3d_mat':[], 'h_3d_mat':[], 'det_score_mat':[]} + + img_list = os.listdir(img_folder) + #track_struct['track_params']['num_fr'] = len(img_list) + for n in range(track_struct['track_params']['num_fr']): + + + # fr idx starts from 1 + fr_idx = n+1 + idx = np.where(np.logical_and(M[:,0]==fr_idx,M[:,5]>track_struct['track_params']['det_thresh']))[0] + if len(idx)>1: + choose_idx, _ = merge_bbox(M[idx,1:5], 0.3, M[idx,5]) + #import pdb; pdb.set_trace() + temp_M = M[idx[choose_idx],:] + else: + temp_M = M[idx,:] + num_bbox = len(temp_M) + + img_name = file_name(fr_idx-1,10)+'.png' + if img_name in img_list: + img_path = img_folder+'/'+img_name + img = misc.imread(img_path) + else: + num_bbox = 0 + + track_struct['track_obj']['track_id'].append([]) + if num_bbox==0: + track_struct['track_obj']['bbox'].append([]) + track_struct['track_obj']['bbox_3d'].append([]) + track_struct['track_obj']['det_score'].append([]) + track_struct['track_obj']['mean_color'].append([]) + else: + track_struct['track_obj']['bbox'].append(temp_M[:,1:5]) + track_struct['track_obj']['bbox_3d'].append(temp_M[:,6:10]) + track_struct['track_obj']['det_score'].append(temp_M[:,5]) + temp_mean_color = np.zeros((num_bbox,3)) + for k in range(num_bbox): + xmin = int(max(0,temp_M[k,1])) + ymin = int(max(0,temp_M[k,2])) + xmax = int(min(img.shape[1]-1,temp_M[k,1]+temp_M[k,3])) + ymax = int(min(img.shape[0]-1,temp_M[k,2]+temp_M[k,4])) + temp_mean_color[k,0] = np.mean(img[ymin:ymax+1,xmin:xmax+1,0]) + temp_mean_color[k,1] = np.mean(img[ymin:ymax+1,xmin:xmax+1,1]) + temp_mean_color[k,2] = np.mean(img[ymin:ymax+1,xmin:xmax+1,2]) + temp_mean_color = temp_mean_color/255.0 + #import pdb; pdb.set_trace() + track_struct['track_obj']['mean_color'].append(temp_mean_color.copy()) + #import pdb; pdb.set_trace() + + #import pdb; pdb.set_trace() + # forward tracking + init_num = 2000 + track_struct['tracklet_mat']['xmin_mat'] = -1*np.ones((init_num,track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['ymin_mat'] = -1*np.ones((init_num,track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['xmax_mat'] = -1*np.ones((init_num,track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['ymax_mat'] = -1*np.ones((init_num,track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['det_score_mat'] = -1*np.ones((init_num,track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['x_3d_mat'] = -1*np.ones((init_num,track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['y_3d_mat'] = -1*np.ones((init_num,track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['w_3d_mat'] = -1*np.ones((init_num,track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['h_3d_mat'] = -1*np.ones((init_num,track_struct['track_params']['num_fr'])) + + max_id = 0 + for n in range(track_struct['track_params']['num_fr']-1): + print(n) + #print(max_id) + track_struct['tracklet_mat'], track_struct['track_obj']['track_id'][n], track_struct['track_obj']['track_id'][n+1], max_id \ + = forward_tracking(track_struct['track_obj']['track_id'][n], track_struct['track_obj']['track_id'][n+1], + track_struct['track_obj']['bbox'][n], track_struct['track_obj']['bbox'][n+1], + track_struct['track_obj']['bbox_3d'][n], track_struct['track_obj']['bbox_3d'][n+1], + track_struct['track_obj']['det_score'][n], track_struct['track_obj']['det_score'][n+1], + track_struct['track_obj']['mean_color'][n], track_struct['track_obj']['mean_color'][n+1], + n+2, track_struct['track_params'], track_struct['tracklet_mat'], max_id) + mask = track_struct['tracklet_mat']['xmin_mat']==-1 + mask = np.sum(mask,axis=1) + neg_idx = np.where(mask==track_struct['track_params']['num_fr'])[0] + track_struct['tracklet_mat']['xmin_mat'] = np.delete(track_struct['tracklet_mat']['xmin_mat'], neg_idx, axis=0) + track_struct['tracklet_mat']['ymin_mat'] = np.delete(track_struct['tracklet_mat']['ymin_mat'], neg_idx, axis=0) + track_struct['tracklet_mat']['xmax_mat'] = np.delete(track_struct['tracklet_mat']['xmax_mat'], neg_idx, axis=0) + track_struct['tracklet_mat']['ymax_mat'] = np.delete(track_struct['tracklet_mat']['ymax_mat'], neg_idx, axis=0) + track_struct['tracklet_mat']['det_score_mat'] = np.delete(track_struct['tracklet_mat']['det_score_mat'], neg_idx, axis=0) + track_struct['tracklet_mat']['x_3d_mat'] = np.delete(track_struct['tracklet_mat']['x_3d_mat'], neg_idx, axis=0) + track_struct['tracklet_mat']['y_3d_mat'] = np.delete(track_struct['tracklet_mat']['y_3d_mat'], neg_idx, axis=0) + track_struct['tracklet_mat']['w_3d_mat'] = np.delete(track_struct['tracklet_mat']['w_3d_mat'], neg_idx, axis=0) + track_struct['tracklet_mat']['h_3d_mat'] = np.delete(track_struct['tracklet_mat']['h_3d_mat'], neg_idx, axis=0) + #import pdb; pdb.set_trace() + + # tracklet clustering + iters = 20 + track_struct['tracklet_mat'] = preprocessing(track_struct['tracklet_mat'], 3) + + ''' + # remove large bbox + #import pdb; pdb.set_trace() + for n in range(len(track_struct['tracklet_mat']['xmin_mat'])): + cand_t = np.where(track_struct['tracklet_mat']['xmin_mat'][n,:]!=-1)[0] + temp_h = track_struct['tracklet_mat']['ymax_mat'][n,cand_t]-track_struct['tracklet_mat']['ymin_mat'][n,cand_t] + max_h = np.max(temp_h) + if max_h>400: + remove_set.append(n) + ''' + + #import pdb; pdb.set_trace() + + num_patch, img_size = crop_det(track_struct['tracklet_mat'], track_struct['track_params']['crop_size'], + img_folder, crop_det_folder, 0) + track_struct['tracklet_mat']['appearance_fea_mat'] = feature_extract(feature_size, num_patch, max_length, + crop_det_folder, triplet_model) + #import pdb; pdb.set_trace() + #******************* + track_struct['tracklet_mat']['appearance_fea_mat'][:,2:] = 10*track_struct['tracklet_mat']['appearance_fea_mat'][:,2:] + track_struct['track_params']['img_size'] = img_size + track_struct['tracklet_mat']['comb_track_cost'] = np.zeros((len(track_struct['tracklet_mat']['xmin_mat']), + len(track_struct['tracklet_mat']['xmin_mat']))) + track_struct['tracklet_mat']['comb_track_cost_mask'] = np.zeros((len(track_struct['tracklet_mat']['xmin_mat']), + len(track_struct['tracklet_mat']['xmin_mat']))) + + #import pdb; pdb.set_trace() + + # load nn + batch_X_x = tf.placeholder(tf.float32, [None, 1, max_length, 1]) + batch_X_y = tf.placeholder(tf.float32, [None, 1, max_length, 1]) + batch_X_w = tf.placeholder(tf.float32, [None, 1, max_length, 1]) + batch_X_h = tf.placeholder(tf.float32, [None, 1, max_length, 1]) + batch_X_a = tf.placeholder(tf.float32, [None, feature_size-4, max_length, 1]) + batch_mask_1 = tf.placeholder(tf.float32, [None, 1, max_length, 2]) + batch_mask_2 = tf.placeholder(tf.float32, [None, feature_size-4, max_length, 2]) + batch_Y = tf.placeholder(tf.int32, [None, num_classes]) + keep_prob = tf.placeholder(tf.float32) + + y_conv = seq_nn_3d.seq_nn(batch_X_x,batch_X_y,batch_X_w,batch_X_h,batch_X_a,batch_mask_1, + batch_mask_2,batch_Y,max_length,feature_size,keep_prob) + + cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=batch_Y, logits=y_conv)) + train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) + correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(batch_Y, 1)) + accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) + + init = tf.global_variables_initializer() + saver = tf.train.Saver() + + with tf.Session() as sess: + + saver.restore(sess, seq_model) + print("Model restored.") + + for n in range(iters): + print("iteration") + print(n) + change_flag = tracklet_clustering(sess, + batch_X_x, batch_X_y, batch_X_w, batch_X_h, batch_X_a, batch_mask_1, + batch_mask_2, batch_Y, keep_prob, y_conv) + if change_flag==0: + #import pdb; pdb.set_trace() + time_check_flag = time_cluster_check() + break + + + #pickle.dump(save_fea_mat, open(save_fea_path, 'wb')) + #pickle.dump(track_set, open(save_label_path,'wb')) + #pickle.dump(remove_set, open(save_remove_path,'wb')) + + ''' + print(np.sum(track_struct['tracklet_mat']['comb_track_cost_mask'])) + global all_fea_mat + global all_fea_label + remove_idx = [] + for n in range(len(all_fea_mat)): + if np.sum(all_fea_mat[n,0,:,1])==0: + remove_idx.append(n) + + all_fea_mat = np.delete(all_fea_mat, np.array(remove_idx), axis=0) + all_fea_label = np.delete(all_fea_label, np.array(remove_idx), axis=0) + + print(len(all_fea_mat)) + #import pdb; pdb.set_trace() + pickle.dump(all_fea_mat, open(save_all_fea_path,'wb')) + pickle.dump(all_fea_label, open(save_all_label_path,'wb')) + + + + save_batch_size = 5000 + save_batch_num = int(np.ceil(len(all_fea_mat)/save_batch_size)) + for k in range(save_batch_num): + if k!=save_batch_num-1: + temp_fea = all_fea_mat[k*save_batch_size:(k+1)*save_batch_size,:,:,:] + temp_label = all_fea_label[k*save_batch_size:(k+1)*save_batch_size,:] + else: + temp_fea = all_fea_mat[k*save_batch_size:,:,:,:] + temp_label = all_fea_label[k*save_batch_size:,:] + temp_fea_path = 'D:/Data/MOT/save_fea_mat/'+seq_name+'_all'+str(k)+'.obj' + temp_label_path = 'D:/Data/MOT/save_fea_mat/'+seq_name+'_all_label'+str(k)+'.obj' + pickle.dump(temp_fea, open(temp_fea_path,'wb')) + pickle.dump(temp_label, open(temp_label_path,'wb')) + ''' + + post_processing() + + pickle.dump(track_struct, open(track_struct_path,'wb')) + + wrt_txt(track_struct['final_tracklet_mat']) + + draw_result(img_folder, tracking_img_folder) + + convert_frames_to_video(tracking_img_folder+'/', tracking_video_path, 20) + + return track_struct + +def refine_track_set(): + + track_struct = pickle.load(open(track_struct_path,'rb')) + track_set = pickle.load(open(save_label_path,'rb')) + + track_interval = track_struct['tracklet_mat']['track_interval'] + new_track_cluster = [] + for n in range(len(track_struct['final_tracklet_mat']['track_cluster'])): + new_track_cluster.append(track_struct['final_tracklet_mat']['track_cluster'][n].copy()) + new_track_class = track_struct['final_tracklet_mat']['track_class'].copy() + #import pdb; pdb.set_trace() + + # split track + for n in range(len(track_set)): + + if track_set[n,2]==1: + continue + track_class1 = new_track_class[track_set[n,0]][0] + track_class2 = new_track_class[track_set[n,1]][0] + if track_class1!=track_class2: + continue + temp_track_cluster = new_track_cluster[track_class1].copy() + sort_idx = np.argsort(track_interval[np.array(temp_track_cluster),1]) + before_track_ids = [] + for k in range(len(sort_idx)): + before_track_ids.append(temp_track_cluster[sort_idx[k]]) + new_track_class[temp_track_cluster[sort_idx[k]]][0] = len(new_track_cluster) + + new_track_cluster[track_class1].remove(temp_track_cluster[sort_idx[k]]) + + if temp_track_cluster[sort_idx[k]]==track_set[n,0]: + break + new_track_cluster.append(before_track_ids) + + #import pdb; pdb.set_trace() + + # merge track + for n in range(len(track_set)): + if track_set[n,2]==0: + continue + track_class1 = new_track_class[track_set[n,0]][0] + track_class2 = new_track_class[track_set[n,1]][0] + + if track_class1==track_class2: + continue + if track_set[n,0] not in track_struct['tracklet_mat']['neighbor_track_idx'][track_set[n,1]]: + continue + + for k in range(len(new_track_cluster[track_class2])): + new_track_class[new_track_cluster[track_class2][k]] = track_class1 + new_track_cluster[track_class1] = new_track_cluster[track_class1].copy()+new_track_cluster[track_class2].copy() + new_track_cluster[track_class2] = [] + + #if track_set[n,0]==271 and track_set[n,1]==290: + # import pdb; pdb.set_trace() + + remove_idx = [] + for n in range(len(new_track_cluster)): + if len(new_track_cluster[n])==0: + remove_idx.append(n) + + new_track_cluster = list(np.delete(new_track_cluster, remove_idx)) + + #import pdb; pdb.set_trace() + + # update track class + N_tracklet = track_struct['tracklet_mat']['xmin_mat'].shape[0] + new_track_class = -1*np.ones((N_tracklet,1),dtype=int) + for n in range(len(new_track_cluster)): + for k in range(len(new_track_cluster[n])): + track_id = new_track_cluster[n][k] + new_track_class[track_id,0] = n + + #import pdb; pdb.set_trace() + track_struct['gt_tracklet_mat'] = {'track_cluster':[], 'track_class':[]} + track_struct['gt_tracklet_mat']['track_cluster'] = new_track_cluster.copy() + track_struct['gt_tracklet_mat']['track_class'] = new_track_class.copy() + + #import pdb; pdb.set_trace() + + # update label + all_fea_label = pickle.load(open(save_all_label_path,'rb')) + ''' + all_fea_label2 = pickle.load(open(save_all_label_path2,'rb')) + all_fea_label3 = pickle.load(open(save_all_label_path3,'rb')) + all_fea_label4 = pickle.load(open(save_all_label_path4,'rb')) + all_fea_label = np.concatenate((all_fea_label1, all_fea_label2), axis=0) + all_fea_label = np.concatenate((all_fea_label, all_fea_label3), axis=0) + all_fea_label = np.concatenate((all_fea_label, all_fea_label4), axis=0) + ''' + + for n in range(len(all_fea_label)): + track_class1 = track_struct['gt_tracklet_mat']['track_class'][int(all_fea_label[n,0])] + track_class2 = track_struct['gt_tracklet_mat']['track_class'][int(all_fea_label[n,1])] + if track_class1==track_class2: + all_fea_label[n,2] = 1 + all_fea_label[n,3] = 0 + else: + all_fea_label[n,2] = 0 + all_fea_label[n,3] = 1 + + pickle.dump(all_fea_label, open(save_all_label_path,'wb')) + ''' + save_batch_size = 5000 + save_batch_num = int(np.ceil(len(all_fea_label)/save_batch_size)) + #import pdb; pdb.set_trace() + for k in range(save_batch_num): + if k!=save_batch_num-1: + temp_label = all_fea_label[k*save_batch_size:(k+1)*save_batch_size,:] + else: + temp_label = all_fea_label[k*save_batch_size:,:] + temp_label_path = 'D:/Data/MOT/save_fea_mat/'+seq_name+'_all_label'+str(k)+'.obj' + pickle.dump(temp_label, open(temp_label_path,'wb')) + ''' + pickle.dump(track_struct,open(track_struct_path,'wb')) + +def refine_track(): + det_thresh = 0 + overlap_thresh = 0.5 + linear_len_thresh = 5 + img_size = track_struct['track_params']['img_size'] + + global track_struct + track_struct = pickle.load(open(track_struct_path,'rb')) + M = load_detection(det_path, 'MOT') + num_det = M.shape[0] + cand_mask = np.ones((num_det,1),dtype=int) + + # remove detection with low score + cand_mask[M[:,5]overlap_thresh: + cand_mask[cand_idx[n]] = 0 + + # get mean color of detection + cand_idx = np.where(cand_mask[:,0]==1)[0] + mean_color_mat = np.zeros((num_det,3)) + for n in range(len(cand_idx)): + xmin = int(max(0,M[cand_idx[n],1])) + ymin = int(max(0,M[cand_idx[n],2])) + xmax = int(min(img_size[1]-1,M[cand_idx[n],1]+M[cand_idx[n],3])) + ymax = int(min(img_size[0]-1,M[cand_idx[n],2]+M[cand_idx[n],4])) + mean_color_mat[cand_idx[n],0] = np.mean(img[ymin:ymax+1,xmin:xmax+1,0]) + mean_color_mat[cand_idx[n],1] = np.mean(img[ymin:ymax+1,xmin:xmax+1,1]) + mean_color_mat[cand_idx[n],2] = np.mean(img[ymin:ymax+1,xmin:xmax+1,2]) + + # assign detection to track + tracklet_mat = track_struct['final_tracklet_mat'].copy() + num_track = len(tracklet_mat['xmin_mat']) + det_to_track_overlap = np.zeros((len(cand_idx),num_track)) + det_to_track_mask = np.zeros((len(cand_idx),num_track)) + det_to_track_dist = np.zeros((len(cand_idx),num_track)) + for n in range(len(cand_idx)): + fr_idx = M[cand_idx[n],0] + for m in range(len(tracklet_mat['xmin_mat'])): + non_neg_idx = np.where(tracklet_mat['xmin_mat'][m,:]!=-1)[0] + t_min = np.min(non_neg_idx) + t_max = np.max(non_neg_idx) + if fr_idx-1>=t_min and fr_idx-1<=t_max: + continue + det_to_track_dist[n,m] = min(abs(fr_idx-1-t_min),abs(fr_idx-1-t_max)) + det_to_track_mask[n,m] = 1 + track_bbox = np.zeros((1,4)) + if abs(fr_idx-1-t_min)0: + track_struct['sub_tracklet_mat']['obj_id_mat'][np.array(tracklet_mat["track_cluster"][n],dtype=int)] = obj_ids[save_idx[0]] + continue + + # check assigned_obj_id_mask + obj_mask = track_struct['tracklet_mat']['assigned_obj_id_mask'][obj_ids] + assigned_idx = np.where(obj_mask==1)[0] + if len(assigned_idx)==0: + new_cnt = new_cnt+1 + track_struct['sub_tracklet_mat']['obj_id_mat'][np.array(tracklet_mat["track_cluster"][n],dtype=int)] = avai_ids[new_cnt] + else: + check_flag = 0 + for k in range(len(assigned_idx)): + temp_obj_id = obj_ids[assigned_idx[k]] + if new_assigned_id_mask[temp_obj_id]==1: + continue + else: + track_struct['sub_tracklet_mat']['obj_id_mat'][np.array(tracklet_mat["track_cluster"][n],dtype=int)] \ + = temp_obj_id + check_flag = 1 + new_assigned_id_mask[temp_obj_id] = 1 + break + if check_flag==0: + new_cnt = new_cnt+1 + track_struct['sub_tracklet_mat']['obj_id_mat'][np.array(tracklet_mat["track_cluster"][n],dtype=int)] = avai_ids[new_cnt] + + # copy to tracklet_mat + #import pdb; pdb.set_trace() + cand_track_idx = np.where(track_struct['tracklet_mat']['track_id_mat']!=-1)[0] + track_struct['tracklet_mat']['obj_id_mat'][cand_track_idx] = track_struct['sub_tracklet_mat']['obj_id_mat'].copy() + + return + +def comb_cost(tracklet_set, sess): + + global track_struct + #global all_fea_mat + #global all_fea_label + + img_size = track_struct['track_params']['img_size'] + feature_size = track_struct['track_params']['feature_size'] + max_length = track_struct['track_params']['max_length'] + + tracklet_mat = track_struct['sub_tracklet_mat'] + loc_scales = track_struct['track_params']['loc_scales'] + + ''' + temp_sum = np.sum(all_fea_mat[:,4,:,1], axis=1) + if len(np.where(temp_sum!=0)[0])==0: + fea_id = 0 + else: + fea_id = int(np.max(np.where(temp_sum!=0)[0]))+1 + ''' + + # cnn classifier + N_tracklet = len(tracklet_set) + track_interval = tracklet_mat['track_interval'] + sort_idx = np.argsort(track_interval[np.array(tracklet_set),1]) + cost = 0 + if len(sort_idx)<=1: + return cost + + remove_ids = [] + + comb_fea_mat = np.zeros((int(len(sort_idx)*(len(sort_idx)-1)/2),feature_size,max_length,3)) + comb_fea_label = np.zeros((int(len(sort_idx)*(len(sort_idx)-1)/2),4)) + + temp_cost_list = [] + #print(len(comb_track_cost)) + cnt = -1 + for n in range(0, len(sort_idx)-1): + for kk in range(n+1,len(sort_idx)): + cnt = int(cnt+1) + track_id1 = tracklet_set[sort_idx[n]] + track_id2 = tracklet_set[sort_idx[kk]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + remove_ids.append(cnt) + continue + + #import pdb; pdb.set_trace() + if tracklet_mat['comb_track_cost_mask'][track_id1,track_id2]==1: + cost = cost+tracklet_mat['comb_track_cost'][track_id1,track_id2] + remove_ids.append(cnt) + continue + + comb_fea_label[cnt,0] = track_id1 + comb_fea_label[cnt,1] = track_id2 + + temp_cost_list.append([track_id1,track_id2]) + + + # t starts from 0 + #import pdb; pdb.set_trace() + t1_min = int(track_interval[track_id1,0]) + t1_max = int(track_interval[track_id1,1]) + t2_min = int(track_interval[track_id2,0]) + t2_max = int(track_interval[track_id2,1]) + t_min = int(min(t1_min,t2_min)) + t_max = int(max(t1_max,t2_max)) + + if t_max-t_min+1<=max_length: + comb_fea_mat[cnt,:,t1_min-t_min:t1_max-t_min+1,1] = 1 + comb_fea_mat[cnt,0,t1_min-t_min:t1_max-t_min+1,0] = tracklet_mat['x_3d_mat'][track_id1,t1_min:t1_max+1]/loc_scales[0] + + comb_fea_mat[cnt,1,t1_min-t_min:t1_max-t_min+1,0] = tracklet_mat['y_3d_mat'][track_id1,t1_min:t1_max+1]/loc_scales[1] + + comb_fea_mat[cnt,2,t1_min-t_min:t1_max-t_min+1,0] = tracklet_mat['w_3d_mat'][track_id1,t1_min:t1_max+1]/loc_scales[2] + + comb_fea_mat[cnt,3,t1_min-t_min:t1_max-t_min+1,0] = tracklet_mat['h_3d_mat'][track_id1,t1_min:t1_max+1]/loc_scales[3] + + cand_idx = np.where(tracklet_mat['appearance_fea_mat'][:,0]==track_id1)[0] + if len(cand_idx)>0: + temp_frs = tracklet_mat['appearance_fea_mat'][cand_idx,1] + temp_sort_idx = np.argsort(temp_frs) + cand_idx = cand_idx[temp_sort_idx] + + if comb_fea_mat[cnt,4:,t1_min-t_min:t1_max-t_min+1,0].shape[1]!=np.transpose(tracklet_mat['appearance_fea_mat'] \ + [cand_idx,2:]).shape[1]: + import pdb; pdb.set_trace() + comb_fea_mat[cnt,4:,t1_min-t_min:t1_max-t_min+1,0] = np.transpose(tracklet_mat['appearance_fea_mat'][cand_idx,2:]) + + comb_fea_mat[cnt,:,t2_min-t_min:t2_max-t_min+1,2] = 1 + + comb_fea_mat[cnt,0,t2_min-t_min:t2_max-t_min+1,0] = tracklet_mat['x_3d_mat'][track_id2,t2_min:t2_max+1]/loc_scales[0] + + comb_fea_mat[cnt,1,t2_min-t_min:t2_max-t_min+1,0] = tracklet_mat['y_3d_mat'][track_id2,t2_min:t2_max+1]/loc_scales[1] + + comb_fea_mat[cnt,2,t2_min-t_min:t2_max-t_min+1,0] = tracklet_mat['w_3d_mat'][track_id2,t2_min:t2_max+1]/loc_scales[2] + + comb_fea_mat[cnt,3,t2_min-t_min:t2_max-t_min+1,0] = tracklet_mat['h_3d_mat'][track_id2,t2_min:t2_max+1]/loc_scales[3] + + cand_idx = np.where(tracklet_mat['appearance_fea_mat'][:,0]==track_id2)[0] + if len(cand_idx)>0: + temp_frs = tracklet_mat['appearance_fea_mat'][cand_idx,1] + temp_sort_idx = np.argsort(temp_frs) + cand_idx = cand_idx[temp_sort_idx] + + if comb_fea_mat[cnt,4:,t2_min-t_min:t2_max-t_min+1,0].shape[1]!=np.transpose(tracklet_mat['appearance_fea_mat'] \ + [cand_idx,2:]).shape[1]: + import pdb; pdb.set_trace() + + comb_fea_mat[cnt,4:,t2_min-t_min:t2_max-t_min+1,0] = np.transpose(tracklet_mat['appearance_fea_mat'][cand_idx,2:]) + else: + t_len1 = t1_max-t1_min+1 + t_len2 = t2_max-t2_min+1 + t_len_min = min(t_len1,t_len2) + mid_t = int(0.5*(t1_max+t2_min)) + if mid_t-t1_min+1>=0.5*max_length and t2_max-mid_t+1<=0.5*max_length: + t2_end = t2_max + t1_start = t2_end-max_length+1 + #t1_start = mid_t-int(0.5*max_length)+1 + #t2_end = t1_start+max_length-1 + elif mid_t-t1_min+1<=0.5*max_length and t2_max-mid_t+1>=0.5*max_length: + t1_start = t1_min + t2_end = t1_start+max_length-1 + else: # mid_t-t1_min+1>=0.5*max_length and t2_max-mid_t+1>=0.5*max_length: + t1_start = mid_t-int(0.5*max_length)+1 + t2_end = t1_start+max_length-1 + + comb_fea_mat[cnt,:,0:t1_max-t1_start+1,1] = 1 + if comb_fea_mat[cnt,0,0:t1_max-t1_start+1,0].shape[0] \ + !=tracklet_mat['xmax_mat'][track_id1,t1_start:t1_max+1].shape[0]: + import pdb; pdb.set_trace() + comb_fea_mat[cnt,0,0:t1_max-t1_start+1,0] = tracklet_mat['x_3d_mat'][track_id1,t1_start:t1_max+1]/loc_scales[0] + + comb_fea_mat[cnt,1,0:t1_max-t1_start+1,0] = tracklet_mat['y_3d_mat'][track_id1,t1_start:t1_max+1]/loc_scales[1] + + comb_fea_mat[cnt,2,0:t1_max-t1_start+1,0] = tracklet_mat['w_3d_mat'][track_id1,t1_start:t1_max+1]/loc_scales[2] + + comb_fea_mat[cnt,3,0:t1_max-t1_start+1,0] = tracklet_mat['h_3d_mat'][track_id1,t1_start:t1_max+1]/loc_scales[3] + + cand_idx = np.where(tracklet_mat['appearance_fea_mat'][:,0]==track_id1)[0] + if len(cand_idx)>0: + temp_frs = tracklet_mat['appearance_fea_mat'][cand_idx,1] + temp_sort_idx = np.argsort(temp_frs) + cand_idx = cand_idx[temp_sort_idx] + + cand_idx = cand_idx[t1_start-t1_min:] + comb_fea_mat[cnt,4:,0:t1_max-t1_start+1,0] = np.transpose(tracklet_mat['appearance_fea_mat'][cand_idx,2:]) + + comb_fea_mat[cnt,:,t2_min-t1_start:t2_end-t1_start+1,2] = 1 + if comb_fea_mat[cnt,0,t2_min-t1_start:t2_end-t1_start+1,0].shape[0] \ + !=tracklet_mat['xmin_mat'][track_id2,t2_min:t2_end+1].shape[0]: + import pdb; pdb.set_trace() + comb_fea_mat[cnt,0,t2_min-t1_start:t2_end-t1_start+1,0] = \ + tracklet_mat['x_3d_mat'][track_id2,t2_min:t2_end+1]/loc_scales[0] + comb_fea_mat[cnt,1,t2_min-t1_start:t2_end-t1_start+1,0] = \ + tracklet_mat['y_3d_mat'][track_id2,t2_min:t2_end+1]/loc_scales[1] + comb_fea_mat[cnt,2,t2_min-t1_start:t2_end-t1_start+1,0] = \ + tracklet_mat['w_3d_mat'][track_id2,t2_min:t2_end+1]/loc_scales[2] + comb_fea_mat[cnt,3,t2_min-t1_start:t2_end-t1_start+1,0] = \ + tracklet_mat['h_3d_mat'][track_id2,t2_min:t2_end+1]/loc_scales[3] + + cand_idx = np.where(tracklet_mat['appearance_fea_mat'][:,0]==track_id2)[0] + if len(cand_idx)>0: + temp_frs = tracklet_mat['appearance_fea_mat'][cand_idx,1] + temp_sort_idx = np.argsort(temp_frs) + cand_idx = cand_idx[temp_sort_idx] + #import pdb; pdb.set_trace() + cand_idx = cand_idx[0:t2_end-t2_min+1] + comb_fea_mat[cnt,4:,t2_min-t1_start:t2_end-t1_start+1,0] \ + = np.transpose(tracklet_mat['appearance_fea_mat'][cand_idx,2:]) + + # remove overlap detections + t_overlap = np.where(comb_fea_mat[cnt,0,:,1]+comb_fea_mat[cnt,0,:,2]>1) + if len(t_overlap)>0: + t_overlap = t_overlap[0] + comb_fea_mat[cnt,:,t_overlap,:] = 0 + + + if len(track_set)>0: + search_idx = np.where(np.logical_and(track_set[:,0]==track_id1, track_set[:,1]==track_id2)) + if len(search_idx[0])>0: + #save_fea_mat[search_idx[0][0],:,:,:] = comb_fea_mat[n,:,:,:] + if track_set[search_idx[0][0],2]==1: + comb_fea_label[cnt,2] = 1 + else: + comb_fea_label[cnt,3] = 1 + + + if len(remove_ids)>0: + comb_fea_mat = np.delete(comb_fea_mat, np.array(remove_ids), axis=0) + comb_fea_label = np.delete(comb_fea_label, np.array(remove_ids), axis=0) + + if len(comb_fea_mat)>0: + max_batch_size = 16 + num_batch = int(np.ceil(comb_fea_mat.shape[0]/max_batch_size)) + pred_y = np.zeros((comb_fea_mat.shape[0],2)) + for n in range(num_batch): + if n!=num_batch-1: + batch_size = max_batch_size + else: + batch_size = int(comb_fea_mat.shape[0]-(num_batch-1)*max_batch_size) + + #batch_size = comb_fea_mat.shape[0] + x = np.zeros((batch_size,1,max_length,1)) + y = np.zeros((batch_size,1,max_length,1)) + w = np.zeros((batch_size,1,max_length,1)) + h = np.zeros((batch_size,1,max_length,1)) + ap = np.zeros((batch_size,feature_size-4,max_length,1)) + mask_1 = np.zeros((batch_size,1,max_length,2)) + mask_2 = np.zeros((batch_size,feature_size-4,max_length,2)) + x[:,0,:,0] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,0,:,0] + y[:,0,:,0] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,1,:,0] + w[:,0,:,0] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,2,:,0] + h[:,0,:,0] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,3,:,0] + ap[:,:,:,0] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,4:,:,0] + mask_1[:,0,:,:] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,0,:,1:] + mask_2[:,:,:,:] = comb_fea_mat[n*max_batch_size:n*max_batch_size+batch_size,4:,:,1:] + pred_y[n*max_batch_size:n*max_batch_size+batch_size,:] = sess.run(y_conv, feed_dict={batch_X_x: x, + batch_X_y: y, + batch_X_w: w, + batch_X_h: h, + batch_X_a: ap, + batch_mask_1: mask_1, + batch_mask_2: mask_2, + batch_Y: np.zeros((batch_size,2)), + keep_prob: 1.0}) + + for n in range(len(pred_y)): + if np.sum(comb_fea_label[n,2:4])>0: + continue + if pred_y[n,0]>pred_y[n,1]: + comb_fea_label[n,2] = 1 + else: + comb_fea_label[n,3] = 1 + + if comb_fea_mat.shape[0]!=len(pred_y): + import pdb; pdb.set_trace() + + ''' + all_fea_mat[fea_id:fea_id+len(pred_y),:,:,:] = comb_fea_mat + all_fea_label[fea_id:fea_id+len(pred_y),:] = comb_fea_label + ''' + + + cost = cost+np.sum(pred_y[:,1]-pred_y[:,0]) + #import pdb; pdb.set_trace() + + if pred_y.shape[0]!=len(temp_cost_list): + import pdb; pdb.set_trace() + for n in range(pred_y.shape[0]): + + tracklet_mat['comb_track_cost_mask'][temp_cost_list[n][0],temp_cost_list[n][1]] = 1 + tracklet_mat['comb_track_cost'][temp_cost_list[n][0],temp_cost_list[n][1]] = pred_y[n,1]-pred_y[n,0] + + return cost + +def get_split_cost(track_id, sess): + + global track_struct + tracklet_mat = track_struct['sub_tracklet_mat'] + new_cluster_cost = np.zeros((2,1)) + if len(tracklet_mat['track_cluster'][track_id])<2: + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + track_interval = tracklet_mat['track_interval'].copy() + change_cluster_idx = [len(tracklet_mat['track_cluster']), tracklet_mat['track_class'][track_id]] + new_cluster_set = [] + new_cluster_set.append([track_id]) + remain_tracks = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + remain_tracks.remove(track_id) + new_cluster_set.append(remain_tracks) + + # get cost + if len(remain_tracks)>1: + sort_idx = np.argsort(track_interval[np.array(new_cluster_set[1]),1]) + for n in range(0, len(sort_idx)-1): + track_id1 = new_cluster_set[1][sort_idx[n]] + track_id2 = new_cluster_set[1][sort_idx[n+1]] + #if track_id1==42: + # import pdb; pdb.set_trace() + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + #********************************* + new_cluster_cost[1,0] = comb_cost(remain_tracks, sess) + + # cross cost + comb_cluster = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + sort_idx = np.argsort(track_interval[np.array(comb_cluster),1]) + cross_cost = np.zeros((2,1)) + + cost = np.sum(new_cluster_cost)-cross_cost[1,0] + prev_cost = tracklet_mat['cluster_cost'][tracklet_mat['track_class'][track_id]]-cross_cost[0,0] + diff_cost = cost-prev_cost + + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + +def get_assign_cost(track_id, sess): + + global track_struct + tracklet_mat = track_struct['sub_tracklet_mat'] + + #import pdb; pdb.set_trace() + cluster1 = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + new_cluster_cost = np.zeros((2,1)) + new_cluster_set = [] + new_cluster_set.append(cluster1.copy()) + new_cluster_set[0].remove(track_id) + track_interval = tracklet_mat['track_interval'].copy() + # get cost + if len(new_cluster_set[0])>1: + + sort_idx = np.argsort(track_interval[np.array(new_cluster_set[0]),1]) + for n in range(0, len(sort_idx)-1): + track_id1 = new_cluster_set[0][sort_idx[n]] + track_id2 = new_cluster_set[0][sort_idx[n+1]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + new_cluster_cost[0,0] = comb_cost(new_cluster_set[0], sess) + + track_class = track_struct['sub_tracklet_mat']['track_class'][track_id] + t_cluster_idx = track_struct['sub_tracklet_mat']['track_cluster_t_idx'][track_class] + + NN_cluster = len(tracklet_mat['track_cluster']) + temp_new_cluster_cost = float("inf")*np.ones((NN_cluster,1)) + prev_cost_vec = np.zeros((NN_cluster,1)) + cross_cost_vec = np.zeros((NN_cluster,2)) + + for nn in range(len(t_cluster_idx)): + N_cluster = len(track_struct['sub_tracklet_mat']['time_cluster'][t_cluster_idx[nn]]) + for mm in range(N_cluster): + n = track_struct['sub_tracklet_mat']['time_cluster'][t_cluster_idx[nn]][mm] + # the original cluster + if tracklet_mat['track_class'][track_id]==n: + continue + + # check neighbor and conflict track + cluster2 = tracklet_mat['track_cluster'][n] + neighbor_flag = 1 + conflict_flag = 0 + #remove_flag = 0 + temp_cluster_set = cluster2.copy() + temp_cluster_set.append(track_id) + sort_idx = np.argsort(track_interval[np.array(temp_cluster_set),1]) + for m in range(0, len(sort_idx)-1): + track_id1 = temp_cluster_set[sort_idx[m]] + track_id2 = temp_cluster_set[sort_idx[m+1]] + #if cluster2[m] in remove_set: + # remove_flag = 1 + # break + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + neighbor_flag = 0 + break + if track_id1 in tracklet_mat['conflict_track_idx'][track_id2]: + conflict_flag = 1 + break + if neighbor_flag==0 or conflict_flag==1:# or remove_flag==1: + continue + + # get cost + temp_set = cluster2.copy() + temp_set.append(track_id) + temp_new_cluster_cost[n,0] = comb_cost(temp_set, sess) + + prev_cost_vec[n,0] = tracklet_mat['cluster_cost'][tracklet_mat['track_class'][track_id]] \ + +tracklet_mat['cluster_cost'][n] + + + cost_vec = temp_new_cluster_cost[:,0]+new_cluster_cost[0,0]-cross_cost_vec[:,1] + prev_cost_vec = prev_cost_vec[:,0]-cross_cost_vec[:,0] + + diff_cost_vec = cost_vec-prev_cost_vec + min_idx = np.argmin(diff_cost_vec) + cost = cost_vec[min_idx] + if cost==float("inf"): + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + diff_cost = diff_cost_vec[min_idx] + new_cluster_cost[1,0] = temp_new_cluster_cost[min_idx,0] + change_cluster_idx = [tracklet_mat['track_class'][track_id],min_idx] + temp_set = tracklet_mat['track_cluster'][min_idx].copy() + temp_set.append(track_id) + new_cluster_set.append(temp_set) + + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + +def get_merge_cost(track_id, sess): + + global track_struct + tracklet_mat = track_struct['sub_tracklet_mat'] + track_interval = tracklet_mat['track_interval'].copy() + cluster1 = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + if len(cluster1)==1: + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + track_class = track_struct['sub_tracklet_mat']['track_class'][track_id] + t_cluster_idx = track_struct['sub_tracklet_mat']['track_cluster_t_idx'][track_class] + + NN_cluster = len(tracklet_mat['track_cluster']) + new_cluster_cost_vec = float("inf")*np.ones((NN_cluster,1)) + prev_cost_vec = np.zeros((NN_cluster,1)) + cross_cost_vec = np.zeros((NN_cluster,2)) + + for nn in range(len(t_cluster_idx)): + N_cluster = len(track_struct['sub_tracklet_mat']['time_cluster'][t_cluster_idx[nn]]) + + for mm in range(N_cluster): + n = track_struct['sub_tracklet_mat']['time_cluster'][t_cluster_idx[nn]][mm] + + # the original cluster + if tracklet_mat['track_class'][track_id]==n: + continue + + # check neighbor and conflict track + cluster2 = tracklet_mat['track_cluster'][n].copy() + if len(cluster2)<=1: + continue + + neighbor_flag = 1 + conflict_flag = 0 + #remove_flag = 0 + temp_cluster_set = cluster1+cluster2 + sort_idx = np.argsort(track_interval[np.array(temp_cluster_set),1]) + for m in range(0, len(sort_idx)-1): + track_id1 = temp_cluster_set[sort_idx[m]] + track_id2 = temp_cluster_set[sort_idx[m+1]] + + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + neighbor_flag = 0 + break + if track_id1 in tracklet_mat['conflict_track_idx'][track_id2]: + conflict_flag = 1 + break + + if neighbor_flag==0 or conflict_flag==1:# or remove_flag==1: + continue + + + # get cost + new_cluster_cost_vec[n,0] = comb_cost(cluster1+cluster2, sess) + + prev_cost_vec[n,0] = tracklet_mat['cluster_cost'][tracklet_mat['track_class'][track_id]] \ + +tracklet_mat['cluster_cost'][n] + + prev_cost_vec = prev_cost_vec[:,0]-cross_cost_vec[:,0] + diff_cost_vec = new_cluster_cost_vec[:,0]-prev_cost_vec + min_idx = np.argmin(diff_cost_vec) + cost = new_cluster_cost_vec[min_idx,0] + if cost==float("inf"): + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + diff_cost = diff_cost_vec[min_idx] + new_cluster_cost = np.zeros((2,1)) + new_cluster_cost[0,0] = cost + change_cluster_idx = [tracklet_mat['track_class'][track_id], min_idx] + new_cluster_set = [] + temp_set = cluster1.copy() + temp_set = temp_set+tracklet_mat['track_cluster'][min_idx] + new_cluster_set.append(temp_set) + new_cluster_set.append([]) + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + +def get_switch_cost(track_id, sess): + + global track_struct + tracklet_mat = track_struct['sub_tracklet_mat'] + track_interval = tracklet_mat['track_interval'].copy() + cluster1 = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + S1 = [] + S2 = [] + for k in range(len(cluster1)): + temp_id = cluster1[k] + if tracklet_mat['track_interval'][temp_id,1]<=tracklet_mat['track_interval'][track_id,1]: + S1.append(temp_id) + else: + S2.append(temp_id) + if len(S1)==0 or len(S2)==0: + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + track_class = track_struct['sub_tracklet_mat']['track_class'][track_id] + t_cluster_idx = track_struct['sub_tracklet_mat']['track_cluster_t_idx'][track_class] + + NN_cluster = len(tracklet_mat['track_cluster']) + cost_vec = float("inf")*np.ones((NN_cluster,1)) + prev_cost_vec = np.zeros((NN_cluster,1)) + new_cluster_cost_vec1 = float("inf")*np.ones((NN_cluster,1)) + new_cluster_cost_vec2 = float("inf")*np.ones((NN_cluster,1)) + cross_cost_vec = np.zeros((NN_cluster,2)) + + track_id_set = [] + for n in range(NN_cluster): + track_id_set.append([]) + + for nn in range(len(t_cluster_idx)): + N_cluster = len(track_struct['sub_tracklet_mat']['time_cluster'][t_cluster_idx[nn]]) + + for mm in range(N_cluster): + n = track_struct['sub_tracklet_mat']['time_cluster'][t_cluster_idx[nn]][mm] + + # the original cluster + if tracklet_mat['track_class'][track_id]==n: + continue + + # switch availability check + S3 = [] + S4 = [] + #remove_flag = 0 + cluster2 = tracklet_mat['track_cluster'][n].copy() + for k in range(len(cluster2)): + temp_id = cluster2[k] + + if tracklet_mat['track_interval'][temp_id,1]<=tracklet_mat['track_interval'][track_id,1]: + S3.append(temp_id) + else: + #******************************************** + if tracklet_mat['track_interval'][temp_id,1] >=tracklet_mat['track_interval'][track_id,1] \ + and tracklet_mat['track_interval'][temp_id,0] <=tracklet_mat['track_interval'][track_id,1]: + if tracklet_mat['track_interval'][temp_id,1] -tracklet_mat['track_interval'][track_id,1] \ + >tracklet_mat['track_interval'][track_id,1]-tracklet_mat['track_interval'][temp_id,0]: + S4.append(temp_id) + else: + S3.append(temp_id) + else: + S4.append(temp_id) + + neighbor_flag1 = 1 + conflict_flag1 = 0 + if len(S3)==0: + neighbor_flag1 = 1 + conflict_flag1 = 0 + else: + temp_cluster_set = S3+S2 + sort_idx = np.argsort(track_interval[np.array(temp_cluster_set),1]) + for k in range(0,len(sort_idx)-1): + track_id1 = temp_cluster_set[sort_idx[k]] + track_id2 = temp_cluster_set[sort_idx[k+1]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + neighbor_flag1 = 0 + break + if track_id1 in tracklet_mat['conflict_track_idx'][track_id2]: + conflict_flag1 = 1 + break + + + neighbor_flag2 = 1 + conflict_flag2 = 0 + if len(S4)==0: + neighbor_flag2 = 1 + conflict_flag2 = 0 + else: + temp_cluster_set = S4+S1 + sort_idx = np.argsort(track_interval[np.array(temp_cluster_set),1]) + for k in range(0,len(sort_idx)-1): + track_id1 = temp_cluster_set[sort_idx[k]] + track_id2 = temp_cluster_set[sort_idx[k+1]] + if track_id1 not in tracklet_mat['neighbor_track_idx'][track_id2]: + neighbor_flag2 = 0 + break + if track_id1 in tracklet_mat['conflict_track_idx'][track_id2]: + conflict_flag2 = 1 + break + + if neighbor_flag1==0 or conflict_flag1==1 or neighbor_flag2==0 or conflict_flag2==1: + continue + + # get cost + S_1 = S1+S4 + S_2 = S2+S3 + + new_cluster_cost_vec1[n,0] = comb_cost(S_1, sess) + + new_cluster_cost_vec2[n,0] = comb_cost(S_2, sess) + + cost_vec[n,0] = new_cluster_cost_vec1[n,0]+new_cluster_cost_vec2[n,0] + + track_id_set[n].append(S_1.copy()) + track_id_set[n].append(S_2.copy()) + prev_cost_vec[n,0] = tracklet_mat['cluster_cost'][tracklet_mat['track_class'][track_id]] \ + +tracklet_mat['cluster_cost'][n] + + cost_vec = cost_vec[:,0]-cross_cost_vec[:,1] + prev_cost_vec = prev_cost_vec[:,0]-cross_cost_vec[:,0] + diff_cost_vec = cost_vec-prev_cost_vec + min_idx = np.argmin(diff_cost_vec) + cost = cost_vec[min_idx] + if cost==float("inf"): + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + diff_cost = diff_cost_vec[min_idx] + new_cluster_cost = np.zeros((2,1)) + new_cluster_cost[0,0] = new_cluster_cost_vec1[min_idx,0] + new_cluster_cost[1,0] = new_cluster_cost_vec2[min_idx,0] + + change_cluster_idx = [tracklet_mat['track_class'][track_id], min_idx] + new_cluster_set = [] + new_cluster_set.append(track_id_set[min_idx][0]) + new_cluster_set.append(track_id_set[min_idx][1]) + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + +def get_break_cost(track_id, sess): + + global track_struct + tracklet_mat = track_struct['sub_tracklet_mat'] + + new_cluster_cost = np.zeros((2,1)) + cluster1 = tracklet_mat['track_cluster'][tracklet_mat['track_class'][track_id]].copy() + if len(cluster1)<=2: + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + # get cost + after_ids = [] + for n in range(len(cluster1)): + if tracklet_mat['track_interval'][cluster1[n],1]>tracklet_mat['track_interval'][track_id,1]: + after_ids.append(cluster1[n]) + + if len(after_ids)==0: + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + before_ids = list(set(cluster1)-set(after_ids)) + if len(before_ids)<=1: + cost = float("inf") + diff_cost = float("inf") + new_cluster_cost = [] + new_cluster_set = [] + change_cluster_idx = [] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + + change_cluster_idx = [len(tracklet_mat['track_cluster']), tracklet_mat['track_class'][track_id]] + new_cluster_set = [] + new_cluster_set.append(before_ids) + remain_tracks = after_ids + new_cluster_set.append(remain_tracks) + new_cluster_cost[0,0] = comb_cost(new_cluster_set[0], sess) + + new_cluster_cost[1,0] = comb_cost(new_cluster_set[1], sess) + + cost = np.sum(new_cluster_cost) + diff_cost = cost-tracklet_mat['cluster_cost'][tracklet_mat['track_class'][track_id]] + return diff_cost,new_cluster_cost,new_cluster_set,change_cluster_idx + +def copy_sub_mat(): + global track_struct + track_struct['sub_tracklet_mat'] = {} + cand_track_idx = np.where(track_struct['tracklet_mat']['track_id_mat']!=-1)[0] + track_struct['sub_tracklet_mat']['xmin_mat'] = track_struct['tracklet_mat']['xmin_mat'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['ymin_mat'] = track_struct['tracklet_mat']['ymin_mat'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['xmax_mat'] = track_struct['tracklet_mat']['xmax_mat'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['ymax_mat'] = track_struct['tracklet_mat']['ymax_mat'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['x_3d_mat'] = track_struct['tracklet_mat']['x_3d_mat'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['y_3d_mat'] = track_struct['tracklet_mat']['y_3d_mat'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['w_3d_mat'] = track_struct['tracklet_mat']['w_3d_mat'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['h_3d_mat'] = track_struct['tracklet_mat']['h_3d_mat'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['det_score_mat'] = track_struct['tracklet_mat']['det_score_mat'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['track_interval'] = track_struct['tracklet_mat']['track_interval'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['obj_id_mat'] = track_struct['tracklet_mat']['obj_id_mat'][cand_track_idx].copy() + track_struct['sub_tracklet_mat']['track_id_mat'] = track_struct['tracklet_mat']['track_id_mat'][cand_track_idx].copy() + #track_struct['sub_tracklet_mat']['save_obj_id_mask'] = track_struct['tracklet_mat']['save_obj_id_mask'].copy() + #track_struct['sub_tracklet_mat']['assigned_obj_id_mask'] = track_struct['tracklet_mat']['assigned_obj_id_mask'].copy() + + # update comb_track_cost + change_idx = np.zeros(track_struct['track_params']['num_track'], dtype=int) + for n in range(track_struct['track_params']['num_track']): + if track_struct['tracklet_mat']['track_interval'][n,1]-track_struct['tracklet_mat']['track_interval'][n,0] \ + !=track_struct['tracklet_mat']['prev_track_interval'][n,1]-track_struct['tracklet_mat']['prev_track_interval'][n,0] \ + or (track_struct['tracklet_mat']['track_interval'][n,0]==0 + and track_struct['tracklet_mat']['prev_track_interval'][n,0]==0 + and track_struct['tracklet_mat']['track_interval'][n,1]==track_struct['track_params']['num_fr']-1 + and track_struct['tracklet_mat']['prev_track_interval'][n,1]==track_struct['track_params']['num_fr']-1): + change_idx[n] = 1 + + track_struct['tracklet_mat']['comb_track_cost'][change_idx==1,:] = 0 + track_struct['tracklet_mat']['comb_track_cost'][:,change_idx==1] = 0 + track_struct['tracklet_mat']['comb_track_cost_mask'][change_idx==1,:] = 0 + track_struct['tracklet_mat']['comb_track_cost_mask'][:,change_idx==1] = 0 + + temp_mat = track_struct['tracklet_mat']['comb_track_cost'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['comb_track_cost'] = temp_mat[:,cand_track_idx].copy() + + temp_mat = track_struct['tracklet_mat']['comb_track_cost_mask'][cand_track_idx,:].copy() + track_struct['sub_tracklet_mat']['comb_track_cost_mask'] = temp_mat[:,cand_track_idx].copy() + + fea_cand_idx = np.where(track_struct['tracklet_mat']['appearance_fea_mat'][:,0]!=-1)[0] + track_struct['sub_tracklet_mat']['appearance_fea_mat'] = track_struct['tracklet_mat']['appearance_fea_mat'][fea_cand_idx,:].copy() + + # update track_id for sub_tracklet_mat + for n in range(len(cand_track_idx)): + temp_idx = np.where(track_struct['sub_tracklet_mat']['appearance_fea_mat'][:,0]==cand_track_idx[n])[0] + track_struct['sub_tracklet_mat']['appearance_fea_mat'][temp_idx,0] = n + + return + +def init_clustering(): + + global track_struct + + # copy the sub tracklet_mat + copy_sub_mat() + + N_tracklet = track_struct['sub_tracklet_mat']['xmin_mat'].shape[0] + + # track cluster + track_struct['sub_tracklet_mat']['track_cluster'] = [] + + # track class + track_struct['sub_tracklet_mat']['track_class'] = np.arange(N_tracklet, dtype=int) + + # time cluster + track_struct['sub_tracklet_mat']['time_cluster'] = [] + for n in range(track_struct['track_params']['num_time_cluster']): + track_struct['sub_tracklet_mat']['time_cluster'].append([]) + + track_struct['sub_tracklet_mat']['track_cluster_t_idx'] = [] + for n in range(N_tracklet): + idx = np.where(track_struct['sub_tracklet_mat']['xmin_mat'][n,:]!=-1)[0] + track_struct['sub_tracklet_mat']['track_interval'][n,0] = np.min(idx) + track_struct['sub_tracklet_mat']['track_interval'][n,1] = np.max(idx) + track_struct['sub_tracklet_mat']['track_cluster'].append([n]) + + if n in remove_set: + track_struct['sub_tracklet_mat']['track_cluster_t_idx'].append([-1]) + else: + min_time_cluster_idx = int(np.floor(max(track_struct['sub_tracklet_mat']['track_interval'][n,0] + -track_struct['track_params']['t_dist_thresh']-5,0) + /track_struct['track_params']['time_cluster_dist'])) + max_time_cluster_idx = int(np.floor(min(track_struct['sub_tracklet_mat']['track_interval'][n,1] + +track_struct['track_params']['t_dist_thresh']+5, + track_struct['sub_tracklet_mat']['xmin_mat'].shape[1]-1) + /track_struct['track_params']['time_cluster_dist'])) + track_struct['sub_tracklet_mat']['track_cluster_t_idx'].append(list(range(min_time_cluster_idx,max_time_cluster_idx+1))) + for k in range(min_time_cluster_idx,max_time_cluster_idx+1): + track_struct['sub_tracklet_mat']['time_cluster'][k].append(n) + + # get center position of each detection location + mask = track_struct['sub_tracklet_mat']['xmin_mat']==-1 + track_struct['sub_tracklet_mat']['center_x'] = \ + (track_struct['sub_tracklet_mat']['xmin_mat']+track_struct['sub_tracklet_mat']['xmax_mat'])/2 + track_struct['sub_tracklet_mat']['center_y'] = \ + (track_struct['sub_tracklet_mat']['ymin_mat']+track_struct['sub_tracklet_mat']['ymax_mat'])/2 + track_struct['sub_tracklet_mat']['w'] = \ + track_struct['sub_tracklet_mat']['xmax_mat']-track_struct['sub_tracklet_mat']['xmin_mat']+1 + track_struct['sub_tracklet_mat']['h'] = \ + track_struct['sub_tracklet_mat']['ymax_mat']-track_struct['sub_tracklet_mat']['ymin_mat']+1 + track_struct['sub_tracklet_mat']['center_x'][mask] = -1 + track_struct['sub_tracklet_mat']['center_y'][mask] = -1 + track_struct['sub_tracklet_mat']['w'][mask] = -1 + track_struct['sub_tracklet_mat']['h'][mask] = -1 + + # neighbor_track_idx and conflict_track_idx + track_struct['sub_tracklet_mat']['neighbor_track_idx'] = [] + track_struct['sub_tracklet_mat']['conflict_track_idx'] = [] + for n in range(N_tracklet): + track_struct['sub_tracklet_mat']['neighbor_track_idx'].append([]) + track_struct['sub_tracklet_mat']['conflict_track_idx'].append([]) + for n in range(N_tracklet-1): + for m in range(n+1, N_tracklet): + t_min1 = track_struct['sub_tracklet_mat']['track_interval'][n,0] + t_max1 = track_struct['sub_tracklet_mat']['track_interval'][n,1] + t_min2 = track_struct['sub_tracklet_mat']['track_interval'][m,0] + t_max2 = track_struct['sub_tracklet_mat']['track_interval'][m,1] + overlap_len = min(t_max2,t_max1)-max(t_min1,t_min2)+1 + overlap_r = overlap_len/(t_max1-t_min1+1+t_max2-t_min2+1-overlap_len) + if overlap_len>0 and overlap_r>track_struct['track_params']['track_overlap_thresh']: + track_struct['sub_tracklet_mat']['conflict_track_idx'][n].append(m) + track_struct['sub_tracklet_mat']['conflict_track_idx'][m].append(n) + continue + if overlap_len>0 and overlap_r<=track_struct['track_params']['track_overlap_thresh']: + # check the search region + t1 = int(max(t_min1,t_min2)) + t2 = int(min(t_max2,t_max1)) + if (t_min1<=t_min2 and t_max1>=t_max2) or (t_min1>=t_min2 and t_max1<=t_max2) or overlap_len>4: + track_struct['sub_tracklet_mat']['conflict_track_idx'][n].append(m) + track_struct['sub_tracklet_mat']['conflict_track_idx'][m].append(n) + continue + + cand_t = np.array(range(t1,t2+1)) + dist_x = abs(track_struct['sub_tracklet_mat']['center_x'][n,cand_t] \ + -track_struct['sub_tracklet_mat']['center_x'][m,cand_t]) + dist_y = abs(track_struct['sub_tracklet_mat']['center_y'][n,cand_t] \ + -track_struct['sub_tracklet_mat']['center_y'][m,cand_t]) + w1 = track_struct['sub_tracklet_mat']['w'][n,cand_t] + h1 = track_struct['sub_tracklet_mat']['h'][n,cand_t] + w2 = track_struct['sub_tracklet_mat']['w'][m,cand_t] + h2 = track_struct['sub_tracklet_mat']['h'][m,cand_t] + + min_len = np.min([np.min(w1),np.min(h1),np.min(w2),np.min(h2)]) + min_dist_x1 = np.min(dist_x/min_len) + min_dist_y1 = np.min(dist_y/min_len) + min_dist_x2 = np.min(dist_x/min_len) + min_dist_y2 = np.min(dist_y/min_len) + if min_dist_x1=t_max2: + t1 = int(t_min1) + t2 = int(t_max2) + else: + t1 = int(t_max1) + t2 = int(t_min2) + + #*********************************** + tr_t1 = np.array(range(int(t_min1),int(t_max1+1))) + tr_x1 = track_struct['sub_tracklet_mat']['center_x'][n,int(t_min1):int(t_max1+1)] + tr_y1 = track_struct['sub_tracklet_mat']['center_y'][n,int(t_min1):int(t_max1+1)] + if len(tr_t1)>10: + if t_min1>=t_max2: + tr_t1 = tr_t1[0:10] + tr_x1 = tr_x1[0:10] + tr_y1 = tr_y1[0:10] + else: + tr_t1 = tr_t1[-10:] + tr_x1 = tr_x1[-10:] + tr_y1 = tr_y1[-10:] + ts_x1 = track_lib.linear_pred_v2(tr_t1, tr_x1, np.array([t2])) + ts_y1 = track_lib.linear_pred_v2(tr_t1, tr_y1, np.array([t2])) + dist_x1 = abs(ts_x1[0]-track_struct['sub_tracklet_mat']['center_x'][m,t2]) + dist_y1 = abs(ts_y1[0]-track_struct['sub_tracklet_mat']['center_y'][m,t2]) + + tr_t2 = np.array(range(int(t_min2),int(t_max2+1))) + tr_x2 = track_struct['sub_tracklet_mat']['center_x'][m,int(t_min2):int(t_max2+1)] + tr_y2 = track_struct['sub_tracklet_mat']['center_y'][m,int(t_min2):int(t_max2+1)] + if len(tr_t2)>10: + if t_min2>t_max1: + tr_t2 = tr_t2[0:10] + tr_x2 = tr_x2[0:10] + tr_y2 = tr_y2[0:10] + else: + tr_t2 = tr_t2[-10:] + tr_x2 = tr_x2[-10:] + tr_y2 = tr_y2[-10:] + + ts_x2 = track_lib.linear_pred_v2(tr_t2, tr_x2, np.array([t1])) + ts_y2 = track_lib.linear_pred_v2(tr_t2, tr_y2, np.array([t1])) + dist_x2 = abs(ts_x2[0]-track_struct['sub_tracklet_mat']['center_x'][n,t1]) + dist_y2 = abs(ts_y2[0]-track_struct['sub_tracklet_mat']['center_y'][n,t1]) + + dist_x = min(dist_x1, dist_x2) + dist_y = min(dist_y1, dist_y2) + #*********************************** + + + w1 = track_struct['sub_tracklet_mat']['w'][n,t1] + h1 = track_struct['sub_tracklet_mat']['h'][n,t1] + w2 = track_struct['sub_tracklet_mat']['w'][m,t2] + h2 = track_struct['sub_tracklet_mat']['h'][m,t2] + + min_len = np.min([np.min(w1),np.min(h1),np.min(w2),np.min(h2)]) + min_dist_x1 = dist_x/min_len + min_dist_y1 = dist_y/min_len + min_dist_x2 = dist_x/min_len + min_dist_y2 = dist_y/min_len + + if min_dist_x1=0: + continue + + change_flag = 1 + + #**************** + #import pdb; pdb.set_trace() + print(min_idx) + print(new_set) + new_t_idx = [] + if len(new_set[min_idx][0])==0: + new_t_idx.append([-1]) + else: + t_min_array = np.zeros((len(new_set[min_idx][0]),1)) + t_max_array = np.zeros((len(new_set[min_idx][0]),1)) + for m in range(len(new_set[min_idx][0])): + t_min_array[m,0] = track_struct['sub_tracklet_mat']['track_interval'][new_set[min_idx][0][m],0] + t_max_array[m,0] = track_struct['sub_tracklet_mat']['track_interval'][new_set[min_idx][0][m],1] + + min_time_cluster_idx = int(np.floor(max(np.min(t_min_array) + -track_struct['track_params']['t_dist_thresh']-5,0) + /track_struct['track_params']['time_cluster_dist'])) + max_time_cluster_idx = int(np.floor(min(np.max(t_max_array) + +track_struct['track_params']['t_dist_thresh']+5, + track_struct['sub_tracklet_mat']['xmin_mat'].shape[1]-1) + /track_struct['track_params']['time_cluster_dist'])) + new_t_idx.append(list(range(min_time_cluster_idx,max_time_cluster_idx+1))) + + if len(new_set[min_idx][1])==0: + new_t_idx.append([-1]) + else: + t_min_array = np.zeros((len(new_set[min_idx][1]),1)) + t_max_array = np.zeros((len(new_set[min_idx][1]),1)) + for m in range(len(new_set[min_idx][1])): + t_min_array[m,0] = track_struct['sub_tracklet_mat']['track_interval'][new_set[min_idx][1][m],0] + t_max_array[m,0] = track_struct['sub_tracklet_mat']['track_interval'][new_set[min_idx][1][m],1] + + min_time_cluster_idx = int(np.floor(max(np.min(t_min_array) + -track_struct['track_params']['t_dist_thresh']-5,0) + /track_struct['track_params']['time_cluster_dist'])) + max_time_cluster_idx = int(np.floor(min(np.max(t_max_array) + +track_struct['track_params']['t_dist_thresh']+5, + track_struct['sub_tracklet_mat']['xmin_mat'].shape[1]-1) + /track_struct['track_params']['time_cluster_dist'])) + new_t_idx.append(list(range(min_time_cluster_idx,max_time_cluster_idx+1))) + + if change_idx[min_idx][0]>=len(track_struct['sub_tracklet_mat']['track_cluster']): + for m in range(len(track_struct['sub_tracklet_mat']['track_cluster']),change_idx[min_idx][0]): + track_struct['sub_tracklet_mat']['track_cluster'].append([]) + track_struct['sub_tracklet_mat']['track_cluster_t_idx'].append([-1]) + track_struct['sub_tracklet_mat']['track_cluster'].append(new_set[min_idx][0]) + track_struct['sub_tracklet_mat']['track_cluster_t_idx'].append(new_t_idx[0]) + else: + track_struct['sub_tracklet_mat']['track_cluster'][change_idx[min_idx][0]] = new_set[min_idx][0] + track_struct['sub_tracklet_mat']['track_cluster_t_idx'][change_idx[min_idx][0]] = new_t_idx[0] + + if change_idx[min_idx][1]>=len(track_struct['sub_tracklet_mat']['track_cluster']): + for m in range(len(track_struct['sub_tracklet_mat']['track_cluster']),change_idx[min_idx][1]): + track_struct['sub_tracklet_mat']['track_cluster'].append([]) + track_struct['sub_tracklet_mat']['track_cluster_t_idx'].append([-1]) + track_struct['sub_tracklet_mat']['track_cluster'].append(new_set[min_idx][1]) + track_struct['sub_tracklet_mat']['track_cluster_t_idx'].append(new_t_idx[1]) + else: + track_struct['sub_tracklet_mat']['track_cluster'][change_idx[min_idx][1]] = new_set[min_idx][1] + track_struct['sub_tracklet_mat']['track_cluster_t_idx'][change_idx[min_idx][1]] = new_t_idx[1] + + #import pdb; pdb.set_trace() + for m in range(track_struct['track_params']['num_time_cluster']): + #import pdb; pdb.set_trace() + if change_idx[min_idx][0] in track_struct['sub_tracklet_mat']['time_cluster'][m]: + track_struct['sub_tracklet_mat']['time_cluster'][m].remove(change_idx[min_idx][0]) + if change_idx[min_idx][1] in track_struct['sub_tracklet_mat']['time_cluster'][m]: + track_struct['sub_tracklet_mat']['time_cluster'][m].remove(change_idx[min_idx][1]) + + for m in range(track_struct['track_params']['num_time_cluster']): + if m in new_t_idx[0]: + track_struct['sub_tracklet_mat']['time_cluster'][m].append(change_idx[min_idx][0]) + if m in new_t_idx[1]: + track_struct['sub_tracklet_mat']['time_cluster'][m].append(change_idx[min_idx][1]) + + if change_idx[min_idx][0]>=len(track_struct['sub_tracklet_mat']['cluster_cost']): + for m in range(len(track_struct['sub_tracklet_mat']['cluster_cost']),change_idx[min_idx][0]): + track_struct['sub_tracklet_mat']['cluster_cost'].append(0) + track_struct['sub_tracklet_mat']['cluster_cost'].append(new_C[min_idx][0]) + else: + track_struct['sub_tracklet_mat']['cluster_cost'][change_idx[min_idx][0]] = new_C[min_idx][0] + + if change_idx[min_idx][1]>=len(track_struct['sub_tracklet_mat']['cluster_cost']): + for m in range(len(track_struct['sub_tracklet_mat']['cluster_cost']),change_idx[min_idx][1]): + track_struct['sub_tracklet_mat']['cluster_cost'].append([]) + track_struct['sub_tracklet_mat']['cluster_cost'].append(new_C[min_idx][1]) + else: + track_struct['sub_tracklet_mat']['cluster_cost'][change_idx[min_idx][1]] = new_C[min_idx][1] + + for k in range(len(track_struct['sub_tracklet_mat']['track_cluster'][change_idx[min_idx][0]])): + track_struct['sub_tracklet_mat']['track_class'][track_struct['sub_tracklet_mat'] \ + ['track_cluster'][change_idx[min_idx][0]][k]] = change_idx[min_idx][0] + + for k in range(len(track_struct['sub_tracklet_mat']['track_cluster'][change_idx[min_idx][1]])): + track_struct['sub_tracklet_mat']['track_class'][track_struct['sub_tracklet_mat'] \ + ['track_cluster'][change_idx[min_idx][1]][k]] = change_idx[min_idx][1] + #import pdb; pdb.set_trace() + return change_flag + +def feature_encode(sess, image_paths, batch_size): + + # Run forward pass to calculate embeddings + #print('Runnning forward pass on LFW images') + + use_flipped_images = False + use_fixed_image_standardization = False + use_random_rotate = False + use_radnom_crop = False + # Enqueue one epoch of image paths and labels + nrof_embeddings = len(image_paths) # nrof_pairs * nrof_images_per_pair + nrof_flips = 2 if use_flipped_images else 1 + nrof_images = nrof_embeddings * nrof_flips + labels_array = np.expand_dims(np.arange(0,nrof_images),1) + image_paths_array = np.expand_dims(np.repeat(np.array(image_paths),nrof_flips),1) + control_array = np.zeros_like(labels_array, np.int32) + + if use_fixed_image_standardization: + control_array += np.ones_like(labels_array)*facenet.FIXED_STANDARDIZATION + if use_flipped_images: + # Flip every second image + control_array += (labels_array % 2)*facenet.FLIP + if use_random_rotate: + control_array += facenet.RANDOM_ROTATE + if use_radnom_crop: + control_array += facenet.RANDOM_CROP + + sess.run(eval_enqueue_op, {image_paths_placeholder: image_paths_array, + labels_placeholder: labels_array, control_placeholder: control_array}) + + embedding_size = int(embeddings.get_shape()[1]) + assert nrof_images % batch_size == 0, 'The number of LFW images must be an integer multiple of the LFW batch size' + nrof_batches = nrof_images // batch_size + emb_array = np.zeros((nrof_images, embedding_size)) + lab_array = np.zeros((nrof_images,)) + for i in range(nrof_batches): + feed_dict = {phase_train_placeholder:False, batch_size_placeholder:batch_size} + emb, lab = sess.run([embeddings, label_batch], feed_dict=feed_dict) + lab_array[lab] = lab + emb_array[lab, :] = emb + if i % 10 == 9: + print('.', end='') + sys.stdout.flush() + #import pdb; pdb.set_trace() + #np.savetxt("emb_array.csv", emb_array, delimiter=",") + return emb_array + +def crop_det(det_M, img): + global track_struct + crop_det_folder = track_struct['file_path']['crop_det_folder'] + crop_size = track_struct['track_params']['crop_size'] + if not os.path.isdir(crop_det_folder): + os.makedirs(crop_det_folder) + + save_patch_list = [] + for n in range(len(det_M)): + xmin = int(max(0,det_M[n,1])) + xmax = int(min(img.shape[1]-1,det_M[n,1]+det_M[n,3])) + ymin = int(max(0,det_M[n,2])) + ymax = int(min(img.shape[0]-1,det_M[n,2]+det_M[n,4])) + img_patch = img[ymin:ymax,xmin:xmax,:] + img_patch = misc.imresize(img_patch, size=[crop_size,crop_size]) + patch_name = track_lib.file_name(n,4)+'.png' + save_path = crop_det_folder+'/'+patch_name + misc.imsave(save_path, img_patch) + save_patch_list.append(save_path) + + return save_patch_list + +def init_tracklet_model(): + global track_struct + global tracklet_graph + global tracklet_sess + + global batch_X_x + global batch_X_y + global batch_X_w + global batch_X_h + global batch_X_a + global batch_mask_1 + global batch_mask_2 + global batch_Y + global keep_prob + global y_conv + + max_length = track_struct['track_params']['max_length'] + batch_size = track_struct['track_params']['batch_size'] + feature_size = track_struct['track_params']['feature_size'] + num_classes = track_struct['track_params']['num_classes'] + + # build tracklet graph + tracklet_graph = tf.Graph() + with tracklet_graph.as_default(): + # load nn + batch_X_x = tf.placeholder(tf.float32, [None, 1, max_length, 1]) + batch_X_y = tf.placeholder(tf.float32, [None, 1, max_length, 1]) + batch_X_w = tf.placeholder(tf.float32, [None, 1, max_length, 1]) + batch_X_h = tf.placeholder(tf.float32, [None, 1, max_length, 1]) + batch_X_a = tf.placeholder(tf.float32, [None, feature_size-4, max_length, 1]) + batch_mask_1 = tf.placeholder(tf.float32, [None, 1, max_length, 2]) + batch_mask_2 = tf.placeholder(tf.float32, [None, feature_size-4, max_length, 2]) + batch_Y = tf.placeholder(tf.int32, [None, num_classes]) + keep_prob = tf.placeholder(tf.float32) + + y_conv = seq_nn_3d.seq_nn(batch_X_x,batch_X_y,batch_X_w,batch_X_h,batch_X_a,batch_mask_1, + batch_mask_2,batch_Y,max_length,feature_size,keep_prob) + + cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=batch_Y, logits=y_conv)) + train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) + correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(batch_Y, 1)) + accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) + + tracklet_init = tf.global_variables_initializer() + tracklet_saver = tf.train.Saver() + + tracklet_sess = tf.Session(graph=tracklet_graph) + with tracklet_sess.as_default(): + tracklet_saver.restore(tracklet_sess, track_struct['file_path']['seq_model']) + print("Tracklet model restored.") + return + +def init_triplet_model(): + global track_struct + global triplet_graph + global triplet_sess + + global eval_enqueue_op + global image_paths_placeholder + global labels_placeholder + global phase_train_placeholder + global batch_size_placeholder + global control_placeholder + global embeddings + global label_batch + global distance_metric + f_image_size = 160 + distance_metric = 0 + + triplet_graph = tf.Graph() + with triplet_graph.as_default(): + image_paths_placeholder = tf.placeholder(tf.string, shape=(None,1), name='image_paths') + labels_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='labels') + batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') + control_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='control') + phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') + + nrof_preprocess_threads = 4 + image_size = (f_image_size, f_image_size) + eval_input_queue = data_flow_ops.FIFOQueue(capacity=2000000, + dtypes=[tf.string, tf.int32, tf.int32], + shapes=[(1,), (1,), (1,)], + shared_name=None, name=None) + eval_enqueue_op = eval_input_queue.enqueue_many([image_paths_placeholder, + labels_placeholder, control_placeholder], + name='eval_enqueue_op') + image_batch, label_batch = facenet.create_input_pipeline(eval_input_queue, image_size, + nrof_preprocess_threads, batch_size_placeholder) + triplet_sess = tf.Session(graph=triplet_graph) + with triplet_sess.as_default(): + with triplet_graph.as_default(): + # Load the model + input_map = {'image_batch': image_batch, 'label_batch': label_batch, 'phase_train': phase_train_placeholder} + facenet.load_model(track_struct['file_path']['triplet_model'], input_map=input_map) + + # Get output tensor + embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") + coord = tf.train.Coordinator() + tf.train.start_queue_runners(coord=coord, sess=triplet_sess) + return + +def TC_online(det_M, img, t_pointer, fr_idx): + global track_struct + global triplet_graph + global triplet_sess + global tracklet_graph + global tracklet_sess + + num_bbox = len(det_M) + track_struct['track_params']['img_size'] = img.shape + track_struct['tracklet_mat']['imgs'].append(img) + + # last frame in the time window + max_track_id = np.max(track_struct['tracklet_mat']['track_id_mat']) + if t_pointer==track_struct['track_params']['num_fr']: + + # save tracking to file + # fr_id, obj_id, track_id, x, y, w, h, x_3d, y_3d, w_3d, h_3d, det_score + track_idx = np.where(track_struct['tracklet_mat']['xmin_mat'][:,0]!=-1)[0] + num_save_id = len(track_idx) + if num_save_id!=0: + save_mat = np.zeros((num_save_id, 12)) + save_mat[:,0] = fr_idx + save_mat[:,1] = track_struct['tracklet_mat']['obj_id_mat'][track_idx] + track_struct['tracklet_mat']['save_obj_id_mask'][save_mat[:,1].astype(int)] = 1 + save_mat[:,2] = track_struct['tracklet_mat']['track_id_mat'][track_idx] + save_mat[:,3] = track_struct['tracklet_mat']['xmin_mat'][track_idx,0] + save_mat[:,4] = track_struct['tracklet_mat']['ymin_mat'][track_idx,0] + save_mat[:,5] = track_struct['tracklet_mat']['xmax_mat'][track_idx,0] \ + -track_struct['tracklet_mat']['xmin_mat'][track_idx,0] + save_mat[:,6] = track_struct['tracklet_mat']['ymax_mat'][track_idx,0] \ + -track_struct['tracklet_mat']['ymin_mat'][track_idx,0] + save_mat[:,7] = track_struct['tracklet_mat']['x_3d_mat'][track_idx,0] + save_mat[:,8] = track_struct['tracklet_mat']['y_3d_mat'][track_idx,0] + save_mat[:,9] = track_struct['tracklet_mat']['w_3d_mat'][track_idx,0] + save_mat[:,10] = track_struct['tracklet_mat']['h_3d_mat'][track_idx,0] + save_mat[:,11] = track_struct['tracklet_mat']['det_score_mat'][track_idx,0] + f = open(track_struct['file_path']['txt_result_path'], 'a') + np.savetxt(f, save_mat, delimiter=',') + else: + save_mat = [] + + draw_result(track_struct['tracklet_mat']['imgs'][0], save_mat, fr_idx-track_struct['track_params']['num_fr']) + del track_struct['tracklet_mat']['imgs'][0] + + # Slide the time window + track_struct['tracklet_mat']['xmin_mat'][:,:-1] = track_struct['tracklet_mat']['xmin_mat'][:,1:] + track_struct['tracklet_mat']['xmin_mat'][:,-1] = -1 + track_struct['tracklet_mat']['ymin_mat'][:,:-1] = track_struct['tracklet_mat']['ymin_mat'][:,1:] + track_struct['tracklet_mat']['ymin_mat'][:,-1] = -1 + track_struct['tracklet_mat']['xmax_mat'][:,:-1] = track_struct['tracklet_mat']['xmax_mat'][:,1:] + track_struct['tracklet_mat']['xmax_mat'][:,-1] = -1 + track_struct['tracklet_mat']['ymax_mat'][:,:-1] = track_struct['tracklet_mat']['ymax_mat'][:,1:] + track_struct['tracklet_mat']['ymax_mat'][:,-1] = -1 + track_struct['tracklet_mat']['x_3d_mat'][:,:-1] = track_struct['tracklet_mat']['x_3d_mat'][:,1:] + track_struct['tracklet_mat']['x_3d_mat'][:,-1] = -1 + track_struct['tracklet_mat']['y_3d_mat'][:,:-1] = track_struct['tracklet_mat']['y_3d_mat'][:,1:] + track_struct['tracklet_mat']['y_3d_mat'][:,-1] = -1 + track_struct['tracklet_mat']['w_3d_mat'][:,:-1] = track_struct['tracklet_mat']['w_3d_mat'][:,1:] + track_struct['tracklet_mat']['w_3d_mat'][:,-1] = -1 + track_struct['tracklet_mat']['h_3d_mat'][:,:-1] = track_struct['tracklet_mat']['h_3d_mat'][:,1:] + track_struct['tracklet_mat']['h_3d_mat'][:,-1] = -1 + track_struct['tracklet_mat']['det_score_mat'][:,:-1] = track_struct['tracklet_mat']['det_score_mat'][:,1:] + track_struct['tracklet_mat']['det_score_mat'][:,-1] = -1 + track_struct['tracklet_mat']['track_interval'] = track_struct['tracklet_mat']['track_interval']-1 + track_struct['tracklet_mat']['track_interval'][track_struct['tracklet_mat']['track_interval'][:,0]<0,0] = 0 + track_struct['tracklet_mat']['track_interval'][track_struct['tracklet_mat']['track_interval'][:,1]<0,0] = -1 + track_struct['tracklet_mat']['track_interval'][track_struct['tracklet_mat']['track_interval'][:,1]<0,1] = -1 + + track_struct['tracklet_mat']['obj_id_mat'][track_struct['tracklet_mat']['track_interval'][:,1]==-1] = -1 + track_struct['tracklet_mat']['track_id_mat'][track_struct['tracklet_mat']['track_interval'][:,1]==-1] = -1 + + t_pointer = t_pointer-1 + + remove_fr_idx = fr_idx-track_struct['track_params']['num_fr'] + remove_fea_idx = np.where(track_struct['tracklet_mat']['appearance_fea_mat'][:,1]==remove_fr_idx)[0] + track_struct['tracklet_mat']['appearance_fea_mat'][remove_fea_idx,:] = -1 + + track_struct['tracklet_mat']['assigned_obj_id_mask'] = track_struct['tracklet_mat']['save_obj_id_mask'].copy() + assigned_ids = track_struct['tracklet_mat']['obj_id_mat'][track_struct['tracklet_mat']['obj_id_mat']!=-1] + track_struct['tracklet_mat']['assigned_obj_id_mask'][assigned_ids] = 1 + avai_ids = np.where(track_struct['tracklet_mat']['assigned_obj_id_mask']==0)[0] + + empty_idx = np.where(track_struct['tracklet_mat']['track_id_mat']==-1)[0] + empty_fea_idx = np.where(track_struct['tracklet_mat']['appearance_fea_mat'][:,0]==-1)[0] + + # crop detection results and extract cnn features + if num_bbox!=0: + patch_list = crop_det(det_M, img) + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[0:len(patch_list)],2:] \ + = 10*feature_encode(triplet_sess, patch_list, len(patch_list)) + + # remove folder + shutil.rmtree(track_struct['file_path']['crop_det_folder']) + + # Forward tracking + if t_pointer==0 and num_bbox!=0: + track_struct['tracklet_mat']['obj_id_mat'][empty_idx[0:num_bbox]] = avai_ids[0:num_bbox] + track_struct['tracklet_mat']['track_id_mat'][empty_idx[0:num_bbox]] = np.array(range(num_bbox),dtype=int)+max_track_id+1 + track_struct['tracklet_mat']['xmin_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,1] + track_struct['tracklet_mat']['ymin_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,2] + track_struct['tracklet_mat']['xmax_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,1]+det_M[:,3] + track_struct['tracklet_mat']['ymax_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,2]+det_M[:,4] + track_struct['tracklet_mat']['x_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,6] + track_struct['tracklet_mat']['y_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,7] + track_struct['tracklet_mat']['w_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,8] + track_struct['tracklet_mat']['h_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,9] + track_struct['tracklet_mat']['det_score_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,5] + track_struct['tracklet_mat']['track_interval'][empty_idx[0:num_bbox],0] = t_pointer + track_struct['tracklet_mat']['track_interval'][empty_idx[0:num_bbox],1] = t_pointer + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[0:len(patch_list)],0] = empty_idx[0:num_bbox] + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[0:len(patch_list)],1] = fr_idx + + elif t_pointer!=0 and num_bbox!=0: + prev_bbox_idx = np.where(track_struct['tracklet_mat']['xmin_mat'][:,t_pointer-1]!=-1)[0] + prev_num_bbox = len(prev_bbox_idx) + if prev_num_bbox==0: + track_struct['tracklet_mat']['obj_id_mat'][empty_idx[0:num_bbox]] = avai_ids[0:num_bbox] + track_struct['tracklet_mat']['track_id_mat'][empty_idx[0:num_bbox]] = np.array(range(num_bbox),dtype=int)+max_track_id+1 + track_struct['tracklet_mat']['xmin_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,1] + track_struct['tracklet_mat']['ymin_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,2] + track_struct['tracklet_mat']['xmax_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,1]+det_M[:,3] + track_struct['tracklet_mat']['ymax_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,2]+det_M[:,4] + track_struct['tracklet_mat']['x_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,6] + track_struct['tracklet_mat']['y_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,7] + track_struct['tracklet_mat']['w_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,8] + track_struct['tracklet_mat']['h_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,9] + track_struct['tracklet_mat']['det_score_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,5] + track_struct['tracklet_mat']['track_interval'][empty_idx[0:num_bbox],0] = t_pointer + track_struct['tracklet_mat']['track_interval'][empty_idx[0:num_bbox],1] = t_pointer + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[0:len(patch_list)],0] = empty_idx[0:num_bbox] + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[0:len(patch_list)],1] = fr_idx + else: + # predict bbox location + bbox1 = np.zeros((prev_num_bbox,4)) + bbox1[:,0] = track_struct['tracklet_mat']['xmin_mat'][prev_bbox_idx,t_pointer-1] + bbox1[:,1] = track_struct['tracklet_mat']['ymin_mat'][prev_bbox_idx,t_pointer-1] + bbox1[:,2] = track_struct['tracklet_mat']['xmax_mat'][prev_bbox_idx,t_pointer-1] \ + -track_struct['tracklet_mat']['xmin_mat'][prev_bbox_idx,t_pointer-1]+1 + bbox1[:,3] = track_struct['tracklet_mat']['ymax_mat'][prev_bbox_idx,t_pointer-1] \ + -track_struct['tracklet_mat']['ymin_mat'][prev_bbox_idx,t_pointer-1]+1 + pred_bbox1 = np.zeros((prev_num_bbox,4)) + + bbox2 = np.zeros((num_bbox,4)) + bbox2[:,:] = det_M[:,1:5] + + # bbox association + for k in range(prev_num_bbox): + temp_track_id = prev_bbox_idx[k] + t_idx = np.where(track_struct['tracklet_mat']['xmin_mat'][temp_track_id,:]!=-1)[0] + t_min = np.min(t_idx) + if t_mintrack_struct['track_params']['color_thresh']] = 0 + idx1, idx2 = track_lib.bbox_associate(overlap_mat, track_struct['track_params']['IOU_thresh']) + #if fr_idx==14: + # import pdb; pdb.set_trace() + + # assign the tracklet_mat + if len(idx1)==0: + track_struct['tracklet_mat']['obj_id_mat'][empty_idx[0:num_bbox]] = avai_ids[0:num_bbox] + track_struct['tracklet_mat']['track_id_mat'][empty_idx[0:num_bbox]] = np.array(range(num_bbox),dtype=int)+max_track_id+1 + track_struct['tracklet_mat']['xmin_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,1] + track_struct['tracklet_mat']['ymin_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,2] + track_struct['tracklet_mat']['xmax_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,1]+det_M[:,3] + track_struct['tracklet_mat']['ymax_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,2]+det_M[:,4] + track_struct['tracklet_mat']['x_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,6] + track_struct['tracklet_mat']['y_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,7] + track_struct['tracklet_mat']['w_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,8] + track_struct['tracklet_mat']['h_3d_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,9] + track_struct['tracklet_mat']['det_score_mat'][empty_idx[0:num_bbox],t_pointer] = det_M[:,5] + track_struct['tracklet_mat']['track_interval'][empty_idx[0:num_bbox],0] = t_pointer + track_struct['tracklet_mat']['track_interval'][empty_idx[0:num_bbox],1] = t_pointer + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[0:len(patch_list)],0] = empty_idx[0:num_bbox] + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[0:len(patch_list)],1] = fr_idx + else: + cnt1 = -1 + cnt2 = -1 + for n in range(num_bbox): + if n not in list(idx2): + cnt1 = cnt1+1 + track_struct['tracklet_mat']['obj_id_mat'][empty_idx[cnt1]] \ + = avai_ids[cnt1] + track_struct['tracklet_mat']['track_id_mat'][empty_idx[cnt1]] \ + = cnt1+max_track_id+1 + track_struct['tracklet_mat']['xmin_mat'][empty_idx[cnt1],t_pointer] = det_M[n,1] + track_struct['tracklet_mat']['ymin_mat'][empty_idx[cnt1],t_pointer] = det_M[n,2] + track_struct['tracklet_mat']['xmax_mat'][empty_idx[cnt1],t_pointer] = det_M[n,1]+det_M[n,3] + track_struct['tracklet_mat']['ymax_mat'][empty_idx[cnt1],t_pointer] = det_M[n,2]+det_M[n,4] + track_struct['tracklet_mat']['x_3d_mat'][empty_idx[cnt1],t_pointer] = det_M[n,6] + track_struct['tracklet_mat']['y_3d_mat'][empty_idx[cnt1],t_pointer] = det_M[n,7] + track_struct['tracklet_mat']['w_3d_mat'][empty_idx[cnt1],t_pointer] = det_M[n,8] + track_struct['tracklet_mat']['h_3d_mat'][empty_idx[cnt1],t_pointer] = det_M[n,9] + track_struct['tracklet_mat']['det_score_mat'][empty_idx[cnt1],t_pointer] = det_M[n,5] + track_struct['tracklet_mat']['track_interval'][empty_idx[cnt1],0] = t_pointer + track_struct['tracklet_mat']['track_interval'][empty_idx[cnt1],1] = t_pointer + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[n],0] = empty_idx[cnt1] + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[n],1] = fr_idx + else: + temp_idx = np.where(idx2==n)[0] + track_struct['tracklet_mat']['xmin_mat'][prev_bbox_idx[idx1[temp_idx[0]]],t_pointer] = det_M[n,1] + track_struct['tracklet_mat']['ymin_mat'][prev_bbox_idx[idx1[temp_idx[0]]],t_pointer] = det_M[n,2] + track_struct['tracklet_mat']['xmax_mat'][prev_bbox_idx[idx1[temp_idx[0]]],t_pointer] = det_M[n,1]+det_M[n,3] + track_struct['tracklet_mat']['ymax_mat'][prev_bbox_idx[idx1[temp_idx[0]]],t_pointer] = det_M[n,2]+det_M[n,4] + track_struct['tracklet_mat']['x_3d_mat'][prev_bbox_idx[idx1[temp_idx[0]]],t_pointer] = det_M[n,6] + track_struct['tracklet_mat']['y_3d_mat'][prev_bbox_idx[idx1[temp_idx[0]]],t_pointer] = det_M[n,7] + track_struct['tracklet_mat']['w_3d_mat'][prev_bbox_idx[idx1[temp_idx[0]]],t_pointer] = det_M[n,8] + track_struct['tracklet_mat']['h_3d_mat'][prev_bbox_idx[idx1[temp_idx[0]]],t_pointer] = det_M[n,9] + track_struct['tracklet_mat']['det_score_mat'][prev_bbox_idx[idx1[temp_idx[0]]],t_pointer] = det_M[n,5] + track_struct['tracklet_mat']['track_interval'][prev_bbox_idx[idx1[temp_idx[0]]],1] = t_pointer + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[n],0] = prev_bbox_idx[idx1[temp_idx[0]]] + track_struct['tracklet_mat']['appearance_fea_mat'][empty_fea_idx[n],1] = fr_idx + + track_struct['tracklet_mat']['assigned_obj_id_mask'] = track_struct['tracklet_mat']['save_obj_id_mask'].copy() + assigned_ids = track_struct['tracklet_mat']['obj_id_mat'][track_struct['tracklet_mat']['obj_id_mat']!=-1] + track_struct['tracklet_mat']['assigned_obj_id_mask'][assigned_ids] = 1 + avai_ids = np.where(track_struct['tracklet_mat']['assigned_obj_id_mask']==0)[0] + + # Tracklet clustering + + iters = 20 + if fr_idx%track_struct['track_params']['clustering_period']==track_struct['track_params']['clustering_period']-1: + for n in range(iters): + print("iteration") + print(n) + change_flag = tracklet_clustering(tracklet_sess, n) + if change_flag==0: + #import pdb; pdb.set_trace() + #time_check_flag = time_cluster_check() + break + + # Update tracklet + post_processing() + + #import pdb; pdb.set_trace() + + return + +def init_TC_tracker(): + global track_struct + + track_struct = {'track_params':{}, 'file_path':{}} + track_struct['file_path']['seq_name'] = '2011_09_26_drive_0091_sync' + track_struct['file_path']['img_name'] = '2011_09_26_drive_0091_sync' + track_struct['file_path']['sub_seq_name'] = '' + track_struct['file_path']['det_path'] = 'D:/Data/KITTI/'+track_struct['file_path']['seq_name']+'/disturb_gt_2.txt' + track_struct['file_path']['img_folder'] = 'D:/Data/KITTI/'+track_struct['file_path']['img_name'] \ + +track_struct['file_path']['sub_seq_name']+'/image_02/data' + track_struct['file_path']['crop_det_folder'] = 'D:/Data/KITTI/temp_crop' + track_struct['file_path']['triplet_model'] = 'D:/Data/UA-Detrac/UA_Detrac_model/KITTI_model' + track_struct['file_path']['seq_model'] = 'D:/Data/UA-Detrac/KITTI_model/model.ckpt' + track_struct['file_path']['tracking_img_folder'] = 'D:/Data/KITTI/tracking_img/'+track_struct['file_path']['seq_name'] \ + +track_struct['file_path']['sub_seq_name'] + track_struct['file_path']['tracking_video_path'] = 'D:/Data/KITTI/tracking_video/'+track_struct['file_path']['seq_name'] \ + +track_struct['file_path']['sub_seq_name']+'.avi' + track_struct['file_path']['txt_result_path'] = 'D:/Data/KITTI/txt_result/'+track_struct['file_path']['seq_name'] \ + +track_struct['file_path']['sub_seq_name']+'.txt' + track_struct['file_path']['track_struct_path'] = 'D:/Data/KITTI/track_struct/'+track_struct['file_path']['seq_name'] \ + +track_struct['file_path']['sub_seq_name']+'.obj' + + track_struct['track_params']['num_fr'] = 50 + track_struct['track_params']['num_track'] = 1000 + track_struct['track_params']['num_max_det'] = 10000 + track_struct['track_params']['max_num_obj'] = 10000 + track_struct['track_params']['IOU_thresh'] = 0.3 + track_struct['track_params']['color_thresh'] = 8 + track_struct['track_params']['det_thresh'] = -2 + track_struct['track_params']['linear_pred_thresh'] = 5 + track_struct['track_params']['t_dist_thresh'] = 15 + track_struct['track_params']['track_overlap_thresh'] = 0.1 + track_struct['track_params']['search_radius'] = 1 + track_struct['track_params']['const_fr_thresh'] = 1 + track_struct['track_params']['crop_size'] = 182 + track_struct['track_params']['loc_scales'] = [100,30,5,5] + track_struct['track_params']['clustering_period'] = 20 + track_struct['track_params']['time_cluster_dist'] = 100 + track_struct['track_params']['num_time_cluster'] \ + = int(np.ceil(track_struct['track_params']['num_fr']/track_struct['track_params']['time_cluster_dist'])) + + track_struct['track_params']['max_length'] = 64 + track_struct['track_params']['feature_size'] = 4+512 + track_struct['track_params']['batch_size'] = 64 + track_struct['track_params']['num_classes'] = 2 + + track_struct['tracklet_mat'] = {'track_id_mat':[], 'xmin_mat':[], 'ymin_mat':[], 'xmax_mat':[], 'ymax_mat':[], 'x_3d_mat':[], + 'y_3d_mat':[], 'w_3d_mat':[], 'h_3d_mat':[], 'det_score_mat':[], 'track_interval':[], + 'obj_id_mat':[], 'appearance_fea_mat':[]} + + track_struct['tracklet_mat']['track_id_mat'] = -np.ones(track_struct['track_params']['num_track'], dtype=int) + track_struct['tracklet_mat']['obj_id_mat'] = -np.ones(track_struct['track_params']['num_track'], dtype=int) + track_struct['tracklet_mat']['xmin_mat'] = -np.ones((track_struct['track_params']['num_track'], + track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['ymin_mat'] = -np.ones((track_struct['track_params']['num_track'], + track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['xmax_mat'] = -np.ones((track_struct['track_params']['num_track'], + track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['ymax_mat'] = -np.ones((track_struct['track_params']['num_track'], + track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['x_3d_mat'] = -np.ones((track_struct['track_params']['num_track'], + track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['y_3d_mat'] = -np.ones((track_struct['track_params']['num_track'], + track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['w_3d_mat'] = -np.ones((track_struct['track_params']['num_track'], + track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['h_3d_mat'] = -np.ones((track_struct['track_params']['num_track'], + track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['det_score_mat'] = \ + -np.ones((track_struct['track_params']['num_track'],track_struct['track_params']['num_fr'])) + track_struct['tracklet_mat']['track_interval'] = -np.ones((track_struct['track_params']['num_track'],2), dtype=int) + track_struct['tracklet_mat']['prev_track_interval'] = -np.ones((track_struct['track_params']['num_track'],2), dtype=int) + track_struct['tracklet_mat']['appearance_fea_mat'] = -np.ones((track_struct['track_params']['num_max_det'], + track_struct['track_params']['feature_size']-4+2)) + + track_struct['tracklet_mat']['comb_track_cost'] = np.zeros((track_struct['track_params']['num_track'], + track_struct['track_params']['num_track'])) + track_struct['tracklet_mat']['comb_track_cost_mask'] = np.zeros((track_struct['track_params']['num_track'], + track_struct['track_params']['num_track']),dtype=int) + track_struct['tracklet_mat']['save_obj_id_mask'] = np.zeros(track_struct['track_params']['max_num_obj'],dtype=int) + track_struct['tracklet_mat']['assigned_obj_id_mask'] = np.zeros(track_struct['track_params']['max_num_obj'],dtype=int) + track_struct['tracklet_mat']['imgs'] = [] + track_struct['tracklet_mat']['color_table'] = track_lib.color_table(track_struct['track_params']['max_num_obj']) + + # remove folder + if os.path.isdir(track_struct['file_path']['crop_det_folder']): + shutil.rmtree(track_struct['file_path']['crop_det_folder']) + + return + +def TC_tracker(): + global track_struct + init_TC_tracker() + + # initialize triplet model + global triplet_graph + global triplet_sess + init_triplet_model() + + # initialize tracklet model + global tracklet_graph + global tracklet_sess + init_tracklet_model() + + M = track_lib.load_detection(track_struct['file_path']['det_path'], 'KITTI_3d') + total_num_fr = int(M[-1,0]-M[0,0]+1) + + t_pointer = 0 + for n in range(total_num_fr): + print("fr_idx %d" % n) + fr_idx = n + idx = np.where(np.logical_and(M[:,0]==fr_idx,M[:,5]>track_struct['track_params']['det_thresh']))[0] + if len(idx)>1: + choose_idx, _ = track_lib.merge_bbox(M[idx,1:5], 0.3, M[idx,5]) + #import pdb; pdb.set_trace() + temp_M = M[idx[choose_idx],:] + else: + temp_M = M[idx,:] + + img_name = track_lib.file_name(fr_idx,10)+'.png' + img_path = track_struct['file_path']['img_folder']+'/'+img_name + img = misc.imread(img_path) + + TC_online(temp_M, img, t_pointer, fr_idx) + t_pointer = t_pointer+1 + if t_pointer>track_struct['track_params']['num_fr']: + t_pointer = track_struct['track_params']['num_fr'] + + return track_struct diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/train_cnn_trajectory_2d.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/train_cnn_trajectory_2d.py new file mode 100644 index 0000000000000000000000000000000000000000..669b67ec897755a4ba1599903f8dd3360790450f --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/train_cnn_trajectory_2d.py @@ -0,0 +1,927 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# /* +# * Copyright ©2019 Gaoang Wang. All rights reserved. Permission is +# * hereby granted for academic use. No other use, copying, distribution, or modification +# * is permitted without prior written consent. Copyrights for +# * third-party components of this work must be honored. Instructors +# * interested in reusing these course materials should contact the +# * author. +# */ +# +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +########################################## +import time +from npu_bridge.npu_init import * +from skimage.transform import resize +########################################## +import tensorflow as tf +import numpy as np +import argparse +import facenet +import re +import src.lfw +import os +import sys +from tensorflow.python.ops import data_flow_ops +from sklearn import metrics +from scipy.optimize import brentq +from scipy import interpolate +from scipy.interpolate import interp1d +from scipy.io import loadmat +from scipy import spatial +import matplotlib.pyplot as plt +import seq_nn_3d_v2 +import random +import math +import scipy +import shutil + + +# MAT_folder = 'C:/Users/tangz/OneDrive/Documents/Gaoang/MOT17/gt_mat' +# img_folder = 'C:/Users/tangz/OneDrive/Documents/Gaoang/MOT17/MOT17Det/train' +# temp_folder = 'C:/Users/tangz/OneDrive/Documents/Gaoang/MOT17/temp' +# triplet_model = 'C:/Users/tangz/OneDrive/Documents/Gaoang/MOT17/MOT_appearance' +# # save_dir = 'C:/Users/tangz/OneDrive/Documents/Gaoang/MOT17/MOT_2d/model.ckpt' +# ################################################################################# +# MAT_folder = '/home/ma-user/modelarts/inputs/data_url_0/mat' +# img_folder = '/home/ma-user/modelarts/inputs/data_url_0/train' +# temp_folder = '/home/ma-user/modelarts/outputs/train_url_0/temp' +# triplet_model = '/home/ma-user/modelarts/inputs/data_url_0/model/20211212-110741' +# save_dir = '/home/ma-user/modelarts/outputs/train_url_0/model/model.ckpt' +# ################################################################################# + +bbox_size = 182 +max_length = 64 +feature_size = 4+512 +batch_size = 32 +num_classes = 2 +margin = 0.15 + +# sample_prob = [0.0852,0.1996,0.2550,0.0313,0.0854,0.1546,0.1890] +########################################## +sample_prob = [600, 105, 837, 525, 654, 900, 750] +########################################## + +#sample_prob = np.ones(25) +#remove_file_idx = [7,23] +#sample_prob[remove_file_idx] = 0 +lr = 1e-3 + + +def main(args): + MAT_folder = args.MAT_folder + img_folder = args.img_folder + temp_folder = args.temp_folder + triplet_model = args.triplet_model + save_dir = args.save_dir + max_step = args.max_step + + # In[4]: + batch_X_x = tf.placeholder(tf.float32, [None, 1, max_length, 1]) + batch_X_y = tf.placeholder(tf.float32, [None, 1, max_length, 1]) + batch_X_w = tf.placeholder(tf.float32, [None, 1, max_length, 1]) + batch_X_h = tf.placeholder(tf.float32, [None, 1, max_length, 1]) + batch_X_a = tf.placeholder(tf.float32, [None, feature_size - 4, max_length, 1]) + batch_mask_1 = tf.placeholder(tf.float32, [None, 1, max_length, 2]) + batch_mask_2 = tf.placeholder(tf.float32, [None, feature_size - 4, max_length, 2]) + batch_Y = tf.placeholder(tf.int32, [None, num_classes]) + keep_prob = tf.placeholder(tf.float32) + + y_conv = seq_nn_3d_v2.seq_nn(batch_X_x, batch_X_y, batch_X_w, batch_X_h, batch_X_a, batch_mask_1, batch_mask_2, + batch_Y, max_length, feature_size, keep_prob) + + cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=batch_Y, logits=y_conv)) + train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy) + correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(batch_Y, 1)) + accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) + + init = tf.global_variables_initializer() + saver = tf.train.Saver() + + ############################################### + config = tf.ConfigProto() + custom_op = config.graph_options.rewrite_options.custom_optimizers.add() + custom_op.name = "NpuOptimizer" + custom_op.parameter_map["mix_compile_mode"].b = True + config.graph_options.rewrite_options.remapping = RewriterConfig.OFF + config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF + ############################################### + with tf.Session(config=npu_config_proto(config_proto=config)) as sess: + sess.run(init) + if os.path.isfile(save_dir + '.meta') == True: + saver.restore(sess, save_dir) + print("Model restored.") + + cnt = 0 + # + for i in range(2000000): + start_time = time.time() + total_batch_x, total_batch_y = generate_data(feature_size, max_length, batch_size * 10, MAT_folder, img_folder,triplet_model, temp_folder) + total_batch_x = interp_batch(total_batch_x) + + # delete temp folder + shutil.rmtree(temp_folder) + + remove_idx = [] + for k in range(len(total_batch_x)): + if np.sum(total_batch_x[k, 0, :, 1]) == 0: + remove_idx.append(k) + total_batch_x = np.delete(total_batch_x, np.array(remove_idx), axis=0) + total_batch_y = np.delete(total_batch_y, np.array(remove_idx), axis=0) + print(len(total_batch_y)) + + total_batch_x[:, 4:, :, 0] = 10 * total_batch_x[:, 4:, :, 0] + temp_X = np.copy(total_batch_x) + temp_Y = np.copy(total_batch_y) + idx = np.arange(total_batch_x.shape[0]) + np.random.shuffle(idx) + for k in range(len(idx)): + total_batch_x[idx[k], :, :, :] = temp_X[k, :, :, :] + total_batch_y[idx[k], :] = temp_Y[k, :] + num_batch = int(np.ceil(len(total_batch_y) / batch_size)) + + # shuffle 4 times + acc = [] + step_time = 0 + for kk in range(num_batch): + temp_batch_size = batch_size + if kk == num_batch - 1: + temp_batch_size = len(total_batch_y) - batch_size * (num_batch - 1) + + cnt = cnt + 1 + batch_x = total_batch_x[kk * batch_size:kk * batch_size + temp_batch_size, :, :, :] + batch_y = total_batch_y[kk * batch_size:kk * batch_size + temp_batch_size, :] + + x = np.zeros((temp_batch_size, 1, max_length, 1)) + y = np.zeros((temp_batch_size, 1, max_length, 1)) + w = np.zeros((temp_batch_size, 1, max_length, 1)) + h = np.zeros((temp_batch_size, 1, max_length, 1)) + ap = np.zeros((temp_batch_size, feature_size - 4, max_length, 1)) + mask_1 = np.zeros((temp_batch_size, 1, max_length, 2)) + mask_2 = np.zeros((temp_batch_size, feature_size - 4, max_length, 2)) + + x[:, 0, :, 0] = batch_x[:, 0, :, 0] + y[:, 0, :, 0] = batch_x[:, 1, :, 0] + w[:, 0, :, 0] = batch_x[:, 2, :, 0] + h[:, 0, :, 0] = batch_x[:, 3, :, 0] + + ap[:, :, :, 0] = batch_x[:, 4:, :, 0] + + mask_1[:, 0, :, :] = batch_x[:, 0, :, 1:] + mask_2[:, :, :, :] = batch_x[:, 4:, :, 1:] + + if cnt % 1 == 0: + + temp_c = 0 + while 1: + y_pred = sess.run(y_conv, feed_dict={batch_X_x: x, + batch_X_y: y, + batch_X_w: w, + batch_X_h: h, + batch_X_a: ap, + batch_mask_1: mask_1, + batch_mask_2: mask_2, + batch_Y: batch_y, + keep_prob: 1.0}) + wrong_idx = [] + for mm in range(len(y_pred)): + if (y_pred[mm, 0] > y_pred[mm, 1] and batch_y[mm, 0] == 0) or ( + y_pred[mm, 0] <= y_pred[mm, 1] and batch_y[mm, 0] == 1): + wrong_idx.append(mm) + + train_accuracy = (len(y_pred) - len(wrong_idx)) / len(y_pred) + if temp_c == 0: + acc.append(train_accuracy) + temp_c = temp_c + 1 + + #print(train_accuracy) + if train_accuracy > 0.9: + break + + train_step.run(feed_dict={batch_X_x: x, + batch_X_y: y, + batch_X_w: w, + batch_X_h: h, + batch_X_a: ap, + batch_mask_1: mask_1, + batch_mask_2: mask_2, + batch_Y: batch_y, + keep_prob: 0.75}) + #print('step %d, training accuracy %g' % (cnt, train_accuracy)) + step_time = time.time() - start_time + print("epoch : {}----step : {}----loss : {}----sec/step : {:.3f}".format(i, cnt, 1-train_accuracy,step_time)) + acc = np.array(acc) + print('accuracy : {}'.format(np.mean(acc))) + + if cnt % 100 == 0: + if not os.path.isdir(save_dir): + os.makedirs(save_dir) + save_path = saver.save(sess, save_dir) + print("Model saved in path: %s" % save_path) + + if cnt >= max_step and np.mean(acc)> 0.8 : + with open(os.path.join(args.output_path, "performance_precision.txt"), "w") as file_write: + write_str = "Final Accuracy accuracy : " + str(np.round(np.mean(acc), 4)) + print(str(write_str)) + file_write.write(write_str) + file_write.write('\r\n') + + write_str = "Final Performance ms/step : " + str(round(step_time * 1000, 4)) + print(str(write_str)) + file_write.write(write_str) + file_write.write('\r\n') + + write_str = "Final Training Duration sec : " + str(round(time.time() - start_time, 4)) + print(str(write_str)) + file_write.write(write_str) + file_write.write('\r\n') + break + + +# In[3]: +def draw_traj(x,mask_1): + fig, ax = plt.subplots() + ax.plot(x,color=[0.5,0.5,0.5],marker='o',linestyle='None') + t1 = np.where(mask_1[:,0]==1)[0] + t2 = np.where(mask_1[:,1]==1)[0] + ax.plot(t1,x[mask_1[:,0]==1],color=[0.2,0.6,0.86],marker='o',linestyle='None') + ax.plot(t2,x[mask_1[:,1]==1],color=[0.18,0.8,0.44],marker='o',linestyle='None') + ax.axhline(y=0, color='k') + ax.axvline(x=0, color='k') + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + ax.spines['bottom'].set_visible(False) + ax.spines['left'].set_visible(False) + plt.xlim(0,64) + y_range = np.max(x)-np.min(x) + plt.ylim(np.min(x)-y_range/50, np.max(x)+y_range/20) + plt.show() + + +def draw_fea_map(x): + fig, ax = plt.subplots() + ax.imshow(np.power(np.transpose(x),0.2),cmap='gray') + ax.set_aspect(0.1) + plt.axis('off') + #plt.xticks(range(8)) + plt.show() + + +def interp_batch(total_batch_x): + interp_batch_x = total_batch_x.copy() + N_batch = total_batch_x.shape[0] + for n in range(N_batch): + temp_idx = np.where(total_batch_x[n,0,:,1]==1)[0] + t1 = int(temp_idx[-1]) + temp_idx = np.where(total_batch_x[n,0,:,2]==1)[0] + t2 = int(temp_idx[0]) + if t2-t1<=1: + continue + interp_t = np.array(range(t1+1,t2)) + for k in range(total_batch_x.shape[1]): + #temp_std = np.std(total_batch_x[n,k,total_batch_x[n,k,:,0]!=0,0]) + temp_std1 = np.std(total_batch_x[n,k,total_batch_x[n,0,:,1]!=0,0]) + temp_std2 = np.std(total_batch_x[n,k,total_batch_x[n,0,:,2]!=0,0]) + x_p = [t1,t2] + f_p = [total_batch_x[n,k,t1,0],total_batch_x[n,k,t2,0]] + #************************************* + #interp_batch_x[n,k,t1+1:t2,0] = np.interp(interp_t,x_p,f_p)+np.random.normal(0, temp_std, t2-t1-1) + #************************************* + interp_batch_x[n,k,t1+1:t2,0] = np.interp(interp_t,x_p,f_p)+np.random.normal(0, (temp_std1+temp_std2)*0.5, t2-t1-1) + return interp_batch_x + + +def num_str(num, length): + cnt = 1 + temp = num + while 1: + temp = int(temp/10) + if temp>0: + cnt = cnt+1 + else: + break + num_len = cnt + for n in range(length-num_len): + if n==0: + out_str = '0' + else: + out_str = out_str+'0' + if length-num_len>0: + return out_str+str(num) + else: + return str(num) + + +def file_name(num, length): + cnt = 1 + temp = num + while 1: + temp = int(temp/10) + if temp>0: + cnt = cnt+1 + else: + break + num_len = cnt + for n in range(length-num_len): + if n==0: + out_str = '0' + else: + out_str = out_str+'0' + if length-num_len>0: + return out_str+str(num) + else: + return str(num) + + +def evaluate(sess, iterator, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, + embeddings, labels, image_paths, batch_size, distance_metric): + # Run forward pass to calculate embeddings + #print('Runnning forward pass on LFW images') + + use_flipped_images = False + use_fixed_image_standardization = False + use_random_rotate = False + use_radnom_crop = False + # Enqueue one epoch of image paths and labels + nrof_embeddings = len(image_paths) # nrof_pairs * nrof_images_per_pair + nrof_flips = 2 if use_flipped_images else 1 + nrof_images = nrof_embeddings * nrof_flips + labels_array = np.expand_dims(np.arange(0,nrof_images),1) + image_paths_array = np.expand_dims(np.repeat(np.array(image_paths),nrof_flips),1) + control_array = np.zeros_like(labels_array, np.int32) + + if use_fixed_image_standardization: + control_array += np.ones_like(labels_array)*facenet.FIXED_STANDARDIZATION + if use_flipped_images: + # Flip every second image + control_array += (labels_array % 2)*facenet.FLIP + if use_random_rotate: + control_array += facenet.RANDOM_ROTATE + if use_radnom_crop: + control_array += facenet.RANDOM_CROP + + sess.run(iterator.initializer, {batch_size_placeholder:batch_size, image_paths_placeholder: image_paths_array, + labels_placeholder: labels_array, control_placeholder: control_array}) + + embedding_size = int(embeddings.get_shape()[1]) + assert nrof_images % batch_size == 0, 'The number of LFW images must be an integer multiple of the LFW batch size' + nrof_batches = nrof_images // batch_size + emb_array = np.zeros((nrof_images, embedding_size)) + lab_array = np.zeros((nrof_images,)) + for i in range(nrof_batches): + ################################################################################### + # feed_dict = {phase_train_placeholder:False, batch_size_placeholder:batch_size} + # emb, lab = sess.run([embeddings, labels], feed_dict=feed_dict) + ################################################################################### + emb, lab = sess.run([embeddings, labels], feed_dict={phase_train_placeholder: False}) + lab_array[lab] = lab + emb_array[lab, :] = emb + if i % 10 == 9: + print('.', end='') + sys.stdout.flush() + #import pdb; pdb.set_trace() + #np.savetxt("emb_array.csv", emb_array, delimiter=",") + return emb_array + + +def split_track(X_2d,Y_2d,W_2d,H_2d,V_2d,img_size,obj_id,noise_scale,connect_thresh): + + err_flag = 0 + part_W_mat = W_2d[:,obj_id] + #import pdb; pdb.set_trace() + non_zero_idx = np.where(part_W_mat>0)[0] + if len(non_zero_idx)<=1 or np.max(non_zero_idx)-np.min(non_zero_idx)+1!=len(non_zero_idx): + err_flag = 1 + return [], [], err_flag + + st_fr = np.min(non_zero_idx) + end_fr = np.max(non_zero_idx) + + bbox_tracklet = [] + bbox_num = [] + + v_flag = 1 + rand_num = random.uniform(0.0,1.0) + if rand_num<0.5 or len(V_2d)==0: # 0.5 + v_flag = 0 + + + #v_flag = 0 + + + for k in range(st_fr, end_fr+1): + rand_num = np.zeros((1,4)) + for kk in range(4): + while 1: + rand_num[0,kk] = np.random.normal(0,0.05,size=1)[0] + if abs(rand_num[0,kk])t_interval[k2,0]: + t_dist = t_interval[k2,0]-t_interval[k1,1] + cand_pairs.append([k1,k2,t_dist]) + if len(cand_pairs)==0: + continue + + cand_pairs = np.array(cand_pairs) + rand_num = np.random.rand(1)[0] + #print(rand_num) + if rand_num<0.7: + select_p = np.exp(-np.power(cand_pairs[:,2],2)/100) + select_p = select_p/sum(select_p) + #print(select_p) + pair_idx = np.random.choice(len(cand_pairs), size=1, p=select_p)[0] + else: + pair_idx = np.random.randint(len(cand_pairs), size=1)[0] + select_pair = cand_pairs[pair_idx] + select_pair = select_pair.astype(int) + + abs_fr_t1 = int(t_interval[select_pair[0],0]) + abs_fr_t2 = int(t_interval[select_pair[0],1]) + abs_fr_t3 = int(t_interval[select_pair[1],0]) + abs_fr_t4 = int(min(abs_fr_t1+max_length-1,t_interval[select_pair[1],1])) + + t1 = 0 + t2 = abs_fr_t2-abs_fr_t1 + t3 = abs_fr_t3-abs_fr_t1 + t4 = abs_fr_t4-abs_fr_t1 + + # mask + X[n,:,t1:t2+1,1] = 1 + X[n,:,t3:t4+1,2] = 1 + #X[n,4:,t1:t2+1,1] = bbox_tracklet[select_pair[0]][:,5] + #X[n,4:,t3:t4+1,2] = bbox_tracklet[select_pair[1]][0:t4-t3+1,5] + #import pdb; pdb.set_trace() + + # X + X[n,0,t1:t2+1,0] = 0.5*(bbox_tracklet[select_pair[0]][:,1]+bbox_tracklet[select_pair[0]][:,3])/img_size[0] + X[n,0,t3:t4+1,0] = 0.5*(bbox_tracklet[select_pair[1]][0:t4-t3+1,1]+bbox_tracklet[select_pair[1]][0:t4-t3+1,3])/img_size[0] + + # Y + X[n,1,t1:t2+1,0] = 0.5*(bbox_tracklet[select_pair[0]][:,2]+bbox_tracklet[select_pair[0]][:,4])/img_size[1] + X[n,1,t3:t4+1,0] = 0.5*(bbox_tracklet[select_pair[1]][0:t4-t3+1,2]+bbox_tracklet[select_pair[1]][0:t4-t3+1,4])/img_size[1] + + # W + X[n,2,t1:t2+1,0] = (bbox_tracklet[select_pair[0]][:,3]-bbox_tracklet[select_pair[0]][:,1])/img_size[0] + X[n,2,t3:t4+1,0] = (bbox_tracklet[select_pair[1]][0:t4-t3+1,3]-bbox_tracklet[select_pair[1]][0:t4-t3+1,1])/img_size[0] + + # H + X[n,3,t1:t2+1,0] = (bbox_tracklet[select_pair[0]][:,4]-bbox_tracklet[select_pair[0]][:,2])/img_size[1] + X[n,3,t3:t4+1,0] = (bbox_tracklet[select_pair[1]][0:t4-t3+1,4]-bbox_tracklet[select_pair[1]][0:t4-t3+1,2])/img_size[1] + ''' + plt.plot(X[n,0,:,0], 'ro') + plt.show() + plt.plot(X[n,1,:,0], 'ro') + plt.show() + plt.plot(X[n,2,:,0], 'ro') + plt.show() + plt.plot(X[n,3,:,0], 'ro') + plt.show() + plt.plot(X[n,0,:,1], 'ro') + plt.show() + plt.plot(X[n,0,:,2], 'ro') + plt.show() + import pdb; pdb.set_trace() + ''' + + # save all bbox + temp_crop_bbox = np.concatenate((bbox_tracklet[select_pair[0]],bbox_tracklet[select_pair[1]][0:t4-t3+1,:]), axis=0) + temp_crop_bbox = temp_crop_bbox.astype(int) + crop_bbox.append(temp_crop_bbox) + break + + #import pdb; pdb.set_trace() + + # negative + for n in range(int(batch_size/2),batch_size): + fr_num = Mat_files[n]['gtInfo'][0][0][0].shape[0] + id_num = Mat_files[n]['gtInfo'][0][0][0].shape[1] + Y[n,1] = 1 + + X_2d = Mat_files[n]['gtInfo'][0][0][0] + Y_2d = Mat_files[n]['gtInfo'][0][0][1] + W_2d = Mat_files[n]['gtInfo'][0][0][2] + H_2d = Mat_files[n]['gtInfo'][0][0][3] + + ######################################### + X_2d = X_2d-margin*W_2d + Y_2d = Y_2d-margin*H_2d + W_2d = (1+2*margin)*W_2d + H_2d = (1+2*margin)*H_2d + ########################################## + + if len(Mat_files[n]['gtInfo'][0][0])<=4: + V_2d = [] + else: + V_2d = Mat_files[n]['gtInfo'][0][0][4] + if len(Mat_files[n]['gtInfo'][0][0])==6: + img_size = Mat_files[n]['gtInfo'][0][0][5][0] + # temp_size = re.findall(r"\d+\.?\d*", img_size) + # img_size = [int(temp_size[0]), int(temp_size[1])] + else: + img_size = [1920,1080] + #V_2d = [] + #img_size = [1920,1080] + + # check candidate obj pairs + #pair_mat = np.zeros((id_num,id_num)) + cand_idx_pairs = [] + for n1 in range(id_num-1): + for n2 in range(n1+1,id_num): + cand_fr1 = np.where(W_2d[:,n1]>0)[0] + cand_fr2 = np.where(W_2d[:,n2]>0)[0] + if max(cand_fr1[0],cand_fr2[0])0)[0] + part_W_mat2 = W_2d[:,obj_id2] + non_zero_idx2 = np.where(part_W_mat2>0)[0] + if len(non_zero_idx1)==0 or len(non_zero_idx2)==0 or \ + max(non_zero_idx1)+max_lengthmax(non_zero_idx2): + continue + + bbox_tracklet1, t_interval1, err_flag = split_track(X_2d,Y_2d,W_2d,H_2d,V_2d,img_size,obj_id1,noise_scale,connect_thresh) + if err_flag==1: + continue + bbox_tracklet2, t_interval2, err_flag = split_track(X_2d,Y_2d,W_2d,H_2d,V_2d,img_size,obj_id2,noise_scale,connect_thresh) + if err_flag==1: + continue + + cand_pairs = [] + if len(bbox_tracklet1)<=1 or len(bbox_tracklet2)<=1: + continue + for k1 in range(len(bbox_tracklet1)): + for k2 in range(len(bbox_tracklet2)): + if t_interval1[k1,0]+max_length>t_interval2[k2,0] and t_interval1[k1,1]0.5,0].shape[0]!=emb_array.shape[0]: + aa = 0 + import pdb; pdb.set_trace() + + X[n,4:,X[n,0,:,1]+X[n,0,:,2]>0.5,0] = emb_array + + #import pdb; pdb.set_trace() + return X, Y + + +def parse_arguments(argv): + parser = argparse.ArgumentParser() + # MAT_folder = '/home/ma-user/modelarts/inputs/data_url_0/mat' + # img_folder = '/home/ma-user/modelarts/inputs/data_url_0/train' + # temp_folder = '/home/ma-user/modelarts/outputs/train_url_0/temp' + # triplet_model = '/home/ma-user/modelarts/inputs/data_url_0/model/20211212-110741' + # save_dir = '/home/ma-user/modelarts/outputs/train_url_0/model/model.ckpt' + parser.add_argument('--MAT_folder', type=str, + help='Directory where to write event logs.', default='/home/ma-user/modelarts/inputs/data_url_0/original_data/MOT17Det/mat/') + parser.add_argument('--img_folder', type=str, + help='Directory where to write trained models and checkpoints.', default='/home/ma-user/modelarts/inputs/data_url_0/original_data/MOT17Det/train/') + parser.add_argument('--temp_folder', type=str, + default='/home/ma-user/modelarts/outputs/train_url_0/temp') + parser.add_argument('--triplet_model', type=str, + default='/home/ma-user/modelarts/outputs/train_url_0/model_data/20211209-124102/ ') + parser.add_argument('--max_step', type=int,default='20000000') + parser.add_argument('--save_dir', type=str, + default='/home/ma-user/modelarts/outputs/train_url_0/models/result/model.ckpt') + parser.add_argument('--output_path', type=str, + default='/home/ma-user/modelarts/outputs/train_url_0/logs/') + return parser.parse_args(argv) + + +if __name__ == '__main__': + main(parse_arguments(sys.argv[1:])) \ No newline at end of file diff --git a/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/train_cnn_trajectory_3d.py b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/train_cnn_trajectory_3d.py new file mode 100644 index 0000000000000000000000000000000000000000..2f7902a30d4d5e394f36660f4cf9e75d0e06a810 --- /dev/null +++ b/TensorFlow/contrib/cv/TNT_ID1233_for_TensorFlow/train_cnn_trajectory_3d.py @@ -0,0 +1,549 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +/* + * Copyright ©2019 Gaoang Wang. All rights reserved. Permission is + * hereby granted for academic use. No other use, copying, distribution, or modification + * is permitted without prior written consent. Copyrights for + * third-party components of this work must be honored. Instructors + * interested in reusing these course materials should contact the + * author. + */ + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +import numpy as np +import argparse +import facenet +import lfw +import os +import sys +from tensorflow.python.ops import data_flow_ops +from sklearn import metrics +from scipy.optimize import brentq +from scipy import interpolate +from scipy.interpolate import interp1d +from scipy.io import loadmat +import matplotlib.pyplot as plt +import seq_nn_3d + +# In[2]: + +MAT_folder = 'D:/KITTI/raw_data/tracking_annotation' +data_folder = 'D:/KITTI/raw_data/2011_09_26' +img_folder = 'D:/KITTI/raw_data/KITTI_crop_all' +triplet_model = 'D:/KITTI/raw_data/pre_model2' +max_length = 64 +feature_size = 4+512 +batch_size = 32 +num_classes = 2 +loc_scales = [100,30,5,5] +img_size = [1242,375] +noise_scales = [0.005,0.005,0.005,0.005] + +# In[3]: +def num_str(num, length): + cnt = 1 + temp = num + while 1: + temp = int(temp/10) + if temp>0: + cnt = cnt+1 + else: + break + num_len = cnt + for n in range(length-num_len): + if n==0: + out_str = '0' + else: + out_str = out_str+'0' + if length-num_len>0: + return out_str+str(num) + else: + return str(num) + + + +def evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, + embeddings, labels, image_paths, batch_size, distance_metric): + # Run forward pass to calculate embeddings + #print('Runnning forward pass on LFW images') + + use_flipped_images = False + use_fixed_image_standardization = False + use_random_rotate = True + use_radnom_crop = True + # Enqueue one epoch of image paths and labels + nrof_embeddings = len(image_paths) # nrof_pairs * nrof_images_per_pair + nrof_flips = 2 if use_flipped_images else 1 + nrof_images = nrof_embeddings * nrof_flips + labels_array = np.expand_dims(np.arange(0,nrof_images),1) + image_paths_array = np.expand_dims(np.repeat(np.array(image_paths),nrof_flips),1) + control_array = np.zeros_like(labels_array, np.int32) + + if use_fixed_image_standardization: + control_array += np.ones_like(labels_array)*facenet.FIXED_STANDARDIZATION + if use_flipped_images: + # Flip every second image + control_array += (labels_array % 2)*facenet.FLIP + if use_random_rotate: + control_array += facenet.RANDOM_ROTATE + if use_radnom_crop: + control_array += facenet.RANDOM_CROP + + sess.run(enqueue_op, {image_paths_placeholder: image_paths_array, labels_placeholder: labels_array, control_placeholder: control_array}) + + embedding_size = int(embeddings.get_shape()[1]) + assert nrof_images % batch_size == 0, 'The number of LFW images must be an integer multiple of the LFW batch size' + nrof_batches = nrof_images // batch_size + emb_array = np.zeros((nrof_images, embedding_size)) + lab_array = np.zeros((nrof_images,)) + for i in range(nrof_batches): + feed_dict = {phase_train_placeholder:False, batch_size_placeholder:batch_size} + emb, lab = sess.run([embeddings, labels], feed_dict=feed_dict) + lab_array[lab] = lab + emb_array[lab, :] = emb + if i % 10 == 9: + print('.', end='') + sys.stdout.flush() + #import pdb; pdb.set_trace() + #np.savetxt("emb_array.csv", emb_array, delimiter=",") + return emb_array + +def generate_data(feature_size, max_length, batch_size, MAT_folder, img_folder): + + # load mat files + Mat_paths = os.listdir(MAT_folder) + choose_idx = np.random.randint(len(Mat_paths), size=batch_size) + Mat_files = [] + seq_names = [] + for n in range(batch_size): + seq_name = Mat_paths[choose_idx[n]][0:21]+'_sync' + temp_path = data_folder+'/'+seq_name+'/gt_2.mat' + temp_mat_file = loadmat(temp_path) + Mat_files.append(temp_mat_file) + seq_names.append(seq_name) + + X = np.zeros((batch_size,feature_size,max_length,3)) + Y = np.zeros((batch_size,2)) + all_paths = [] + + + # positive + for n in range(int(batch_size/2)): + seq_name = seq_names[n] + fr_num = Mat_files[n]['gtInfo'][0][0][0].shape[0] + id_num = Mat_files[n]['gtInfo'][0][0][0].shape[1] + Y[n,0] = 1 + + X_3d = Mat_files[n]['gtInfo'][0][0][4] + Y_3d = Mat_files[n]['gtInfo'][0][0][5] + W_3d = Mat_files[n]['gtInfo'][0][0][6] + H_3d = Mat_files[n]['gtInfo'][0][0][7] + try_time = 0 + if try_time>=10: + continue + while 1: + if try_time>=10: + all_paths.append([]) + #print('err') + break + obj_id = np.random.randint(id_num, size=1)[0] + part_W_mat = Mat_files[n]['gtInfo'][0][0][3][:,obj_id] + + non_zero_idx = np.where(part_W_mat>0)[0] + if np.max(non_zero_idx)-np.min(non_zero_idx)+1!=len(non_zero_idx) or len(non_zero_idx)<=1: + try_time = try_time+1 + continue + st_fr = np.min(non_zero_idx)#+np.random.randint(len(non_zero_idx)-1, size=1)[0] + end_fr = np.max(non_zero_idx) + abs_fr_t1 = int(st_fr+np.random.randint(len(non_zero_idx)-1, size=1)[0]) + abs_end_fr = min(abs_fr_t1+max_length-1,end_fr) + abs_fr_t4 = int(abs_end_fr-np.random.randint(abs_end_fr-abs_fr_t1, size=1)[0]) + abs_fr_t2 = int(abs_fr_t1+np.random.randint(abs_fr_t4-abs_fr_t1, size=1)[0]) + abs_fr_t3 = int(abs_fr_t4-np.random.randint(abs_fr_t4-abs_fr_t2, size=1)[0]) + + t1 = 0 + t2 = abs_fr_t2-abs_fr_t1 + t3 = abs_fr_t3-abs_fr_t1 + t4 = abs_fr_t4-abs_fr_t1 + + # mask + X[n,:,t1:t2+1,1] = 1 + X[n,:,t3:t4+1,2] = 1 + + # X + X[n,0,t1:t2+1,0] = X_3d[abs_fr_t1:abs_fr_t2+1,obj_id]/loc_scales[0]+noise_scales[0]*np.random.normal(0,1,t2-t1+1) + X[n,0,t3:t4+1,0] = X_3d[abs_fr_t3:abs_fr_t4+1,obj_id]/loc_scales[0]+noise_scales[0]*np.random.normal(0,1,t4-t3+1) + + # Y + X[n,1,t1:t2+1,0] = Y_3d[abs_fr_t1:abs_fr_t2+1,obj_id]/loc_scales[1]+noise_scales[1]*np.random.normal(0,1,t2-t1+1) + X[n,1,t3:t4+1,0] = Y_3d[abs_fr_t3:abs_fr_t4+1,obj_id]/loc_scales[1]+noise_scales[1]*np.random.normal(0,1,t4-t3+1) + + # W + X[n,2,t1:t2+1,0] = W_3d[abs_fr_t1:abs_fr_t2+1,obj_id]/loc_scales[2]+noise_scales[2]*np.random.normal(0,1,t2-t1+1) + X[n,2,t3:t4+1,0] = W_3d[abs_fr_t3:abs_fr_t4+1,obj_id]/loc_scales[2]+noise_scales[2]*np.random.normal(0,1,t4-t3+1) + + # H + X[n,3,t1:t2+1,0] = H_3d[abs_fr_t1:abs_fr_t2+1,obj_id]/loc_scales[3]+noise_scales[3]*np.random.normal(0,1,t2-t1+1) + X[n,3,t3:t4+1,0] = H_3d[abs_fr_t3:abs_fr_t4+1,obj_id]/loc_scales[3]+noise_scales[3]*np.random.normal(0,1,t4-t3+1) + ''' + plt.plot(X[n,0,:,0], 'ro') + plt.show() + plt.plot(X[n,1,:,0], 'ro') + plt.show() + plt.plot(X[n,2,:,0], 'ro') + plt.show() + plt.plot(X[n,3,:,0], 'ro') + plt.show() + plt.plot(X[n,0,:,1], 'ro') + plt.show() + plt.plot(X[n,0,:,2], 'ro') + plt.show() + import pdb; pdb.set_trace() + ''' + temp_paths = [] + for k in range(abs_fr_t1,abs_fr_t2+1): + class_name = seq_name+'_image_02_'+num_str(obj_id+1,4) + file_name = class_name+'_'+num_str(k+1,4)+'.png' + temp_path = img_folder+'/'+class_name+'/'+file_name + temp_paths.append(temp_path) + for k in range(abs_fr_t3,abs_fr_t4+1): + class_name = seq_name+'_image_02_'+num_str(obj_id+1,4) + file_name = class_name+'_'+num_str(k+1,4)+'.png' + temp_path = img_folder+'/'+class_name+'/'+file_name + temp_paths.append(temp_path) + all_paths.append(temp_paths.copy()) + break + + + # negative + for n in range(int(batch_size/2),batch_size): + Y[n,1] = 1 + seq_name = seq_names[n] + fr_num = Mat_files[n]['gtInfo'][0][0][0].shape[0] + id_num = Mat_files[n]['gtInfo'][0][0][0].shape[1] + + X_3d = Mat_files[n]['gtInfo'][0][0][4] + Y_3d = Mat_files[n]['gtInfo'][0][0][5] + W_3d = Mat_files[n]['gtInfo'][0][0][6] + H_3d = Mat_files[n]['gtInfo'][0][0][7] + try_time = 0 + + time_interval = np.zeros((id_num,2)) + for obj_id in range(id_num): + part_W_mat = Mat_files[n]['gtInfo'][0][0][3][:,obj_id] + non_zero_idx = np.where(part_W_mat>0)[0] + t_min = np.min(non_zero_idx) + t_max = np.max(non_zero_idx) + if len(non_zero_idx)!=t_max-t_min+1: + time_interval[obj_id,0] = -1 + time_interval[obj_id,1] = -1 + else: + time_interval[obj_id,0] = t_min + time_interval[obj_id,1] = t_max + + + if try_time>=10: + continue + while 1: + if try_time>=10: + all_paths.append([]) + break + split_fr = 1+np.random.randint(fr_num-2, size=1)[0] + + cand_pairs = [] + for id1 in range(id_num): + for id2 in range(id_num): + if id1==id2: + continue + if time_interval[id1,0]==-1 or time_interval[id2,0]==-1: + continue + if time_interval[id1,0]<=split_fr and time_interval[id2,1]>split_fr: + t_above = min(split_fr,time_interval[id1,1]) + t_below = max(split_fr+1,time_interval[id2,0]) + t_dist = t_below-t_above + if t_dist0.5,0].shape[0]!=emb_array.shape[0]: + aa = 0 + import pdb; pdb.set_trace() + + #import pdb; pdb.set_trace() + X[n,4:,X[n,0,:,1]+X[n,0,:,2]>0.5,0] = emb_array + + #import pdb; pdb.set_trace() + return X, Y + +# In[4]: +batch_X_x = tf.placeholder(tf.float32, [None, 1, max_length, 1]) +batch_X_y = tf.placeholder(tf.float32, [None, 1, max_length, 1]) +batch_X_w = tf.placeholder(tf.float32, [None, 1, max_length, 1]) +batch_X_h = tf.placeholder(tf.float32, [None, 1, max_length, 1]) +batch_X_a = tf.placeholder(tf.float32, [None, feature_size-4, max_length, 1]) +batch_mask_1 = tf.placeholder(tf.float32, [None, 1, max_length, 2]) +batch_mask_2 = tf.placeholder(tf.float32, [None, feature_size-4, max_length, 2]) +batch_Y = tf.placeholder(tf.int32, [None, num_classes]) +keep_prob = tf.placeholder(tf.float32) + +y_conv = seq_nn_3d.seq_nn(batch_X_x,batch_X_y,batch_X_w,batch_X_h,batch_X_a,batch_mask_1,batch_mask_2,batch_Y,max_length,feature_size,keep_prob) + +cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=batch_Y, logits=y_conv)) +train_step = tf.train.AdamOptimizer(3e-5).minimize(cross_entropy) +correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(batch_Y, 1)) +accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) + +init = tf.global_variables_initializer() +saver = tf.train.Saver() + +with tf.Session() as sess: + sess.run(init) + + saver.restore(sess, "C:/Users/tangz/OneDrive/Documents/Gaoang/RNN/KITTI_model/model.ckpt") + print("Model restored.") + + cnt = 0 + for i in range(2000000): + total_batch_x, total_batch_y = generate_data(feature_size, max_length, batch_size*10, MAT_folder, img_folder) + remove_idx = [] + for k in range(len(total_batch_x)): + if np.sum(total_batch_x[k,0,:,1])==0: + remove_idx.append(k) + total_batch_x = np.delete(total_batch_x, np.array(remove_idx), axis=0) + total_batch_y = np.delete(total_batch_y, np.array(remove_idx), axis=0) + print(len(total_batch_y)) + + total_batch_x[:,4:,:,0] = 10*total_batch_x[:,4:,:,0] + temp_X = np.copy(total_batch_x) + temp_Y = np.copy(total_batch_y) + idx = np.arange(total_batch_x.shape[0]) + np.random.shuffle(idx) + for k in range(len(idx)): + total_batch_x[idx[k],:,:,:] = temp_X[k,:,:,:] + total_batch_y[idx[k],:] = temp_Y[k,:] + num_batch = int(np.ceil(len(total_batch_y)/batch_size)) + + # shuffle 4 times + for kk in range(num_batch): + temp_batch_size = batch_size + if kk==num_batch-1: + temp_batch_size = len(total_batch_y)-batch_size*(num_batch-1) + + cnt = cnt+1 + batch_x = total_batch_x[kk*batch_size:kk*batch_size+temp_batch_size,:,:,:] + batch_y = total_batch_y[kk*batch_size:kk*batch_size+temp_batch_size,:] + + x = np.zeros((temp_batch_size,1,max_length,1)) + y = np.zeros((temp_batch_size,1,max_length,1)) + w = np.zeros((temp_batch_size,1,max_length,1)) + h = np.zeros((temp_batch_size,1,max_length,1)) + ap = np.zeros((temp_batch_size,feature_size-4,max_length,1)) + mask_1 = np.zeros((temp_batch_size,1,max_length,2)) + mask_2 = np.zeros((temp_batch_size,feature_size-4,max_length,2)) + x[:,0,:,0] = batch_x[:,0,:,0] + y[:,0,:,0] = batch_x[:,1,:,0] + w[:,0,:,0] = batch_x[:,2,:,0] + h[:,0,:,0] = batch_x[:,3,:,0] + ap[:,:,:,0] = batch_x[:,4:,:,0] + mask_1[:,0,:,:] = batch_x[:,0,:,1:] + mask_2[:,:,:,:] = batch_x[:,4:,:,1:] + if cnt % 1 == 0: + ''' + y_pred = sess.run(y_conv,feed_dict={batch_X_x: x, + batch_X_y: y, + batch_X_w: w, + batch_X_h: h, + batch_X_a: ap, + batch_mask_1: mask_1, + batch_mask_2: mask_2, + batch_Y: batch_y, + keep_prob: 1.0}) + #import pdb; pdb.set_trace() + ''' + train_accuracy = accuracy.eval(feed_dict={batch_X_x: x, + batch_X_y: y, + batch_X_w: w, + batch_X_h: h, + batch_X_a: ap, + batch_mask_1: mask_1, + batch_mask_2: mask_2, + batch_Y: batch_y, + keep_prob: 1.0}) + print('step %d, training accuracy %g' % (cnt, train_accuracy)) + ''' + for n in range(10): + shuffle_x = np.copy(batch_x) + shuffle_y = np.copy(batch_y) + + if n!=0: + shuffle_x2 = np.copy(shuffle_x) + shuffle_y2 = np.copy(shuffle_y) + idx = np.array(range(4,feature_size)) + np.random.shuffle(idx) + for k in range(len(idx)): + shuffle_x[:,idx[k],:,:] = shuffle_x2[:,k+4,:,:] + ''' + + #import pdb; pdb.set_trace() + train_step.run(feed_dict={batch_X_x: x, + batch_X_y: y, + batch_X_w: w, + batch_X_h: h, + batch_X_a: ap, + batch_mask_1: mask_1, + batch_mask_2: mask_2, + batch_Y: batch_y, + keep_prob: 0.75}) + + + if cnt % 100 == 0: + save_path = saver.save(sess, 'C:/Users/tangz/OneDrive/Documents/Gaoang/RNN/KITTI_model/model.ckpt') + print("Model saved in path: %s" % save_path) +