function X = integratedArmPerceptionMain(images, points, depthVector, saveDataDirectory, regionHull, imageWindow, graspType, verbose, smoothFeatures, medianFilter) %% The 4 required imputs are: %% images: the image to be predicted on. Must be 640 x 480 %% points: the list of points in 3d %% depthVector: the list of depths %% saveDataDirectory: some directory where the program can save an image %% showing it's results (for debugging). Can be any scratch or temp %% directory, for example. % This code is copyrighted by Stanford University. % Author: Ashutosh Saxena, Justin Drieymeyer % Code cannot be used for commercial purposes. % Use of the code, if used with permission from authors, must acknowledge: % Learning to Grasp Novel Objects using Vision, % Ashutosh Saxena, Justin Driemeyer, Justin Kearns, Chioma Osondu, Andrew Y. Ng. In 10th International Symposium on Experimental Robotics, ISER, 2006. % Robotic Grasping of Novel Obects, % Ashutosh Saxena, Justing Driemeyer, Justin Kearns, Andrew Y. Ng, In Neural Information Processing Systems (NIPS), 2006. % More details at: http://ai.stanford.edu/~asaxena/learninggrasp/ % Version 0.1: Aug 2006. if nargin < 5 regionHull = zeros(0, 4); end if nargin < 6 imageWindow = [180 176; 420 494]; end if nargin < 7 graspType = 'eraser'; end if nargin < 8 verbose = 0; end if nargin < 9 smoothFeatures = false; end if nargin < 10 medianFilter = false; end doZooming = 0; calc3dFeats = 0; %======to get images into "images" and arm coordinates========= global realWorldFlag dishwasherDirectory rackRows rackCols numHandlesToPredict tfile global trainingWeightsFilenameBase perceptionParameters(saveDataDirectory); if 1 imcols = 64; imrows = 48; gridSize = 10; trainingWeightsFilenameToUse = [trainingWeightsFilenameBase graspType '.mat']; nearDist = 0; farDist = 1.2; segSize = farDist - nearDist; depthImage = zeros(imrows*gridSize, imcols*gridSize, 1); inThisSeg = (depthVector(:, 3) > nearDist) & (depthVector(:, 3) <= farDist); beyondThisSeg = (depthVector(:, 3) > farDist); renormedSegment = (inThisSeg .* (depthVector(:, 3) - nearDist) / segSize) + beyondThisSeg; depthSegments = [points(:, 1:2) renormedSegment]; originalDepthMask = zeros(imrows*gridSize, imcols*gridSize); threeDPoints = zeros(imrows*gridSize, imcols*gridSize, 3); for pointsRow=1:size(points, 1); thisHomoPoint = reshape([points(pointsRow, 3:5) 1], [4, 1]); % If the point is on the positive side of all region bounding planes include it, otherwise throw it out. if ((sum((regionHull*thisHomoPoint)>1))==size(regionHull,1)) originalDepthMask(depthVector(pointsRow, 1), depthVector(pointsRow, 2)) = depthVector(pointsRow, 3); threeDPoints(points(pointsRow, 1), points(pointsRow, 2), :) = reshape(points(pointsRow, 3:5), [1, 1, 3]); depthImage(depthSegments(pointsRow, 1), depthSegments(pointsRow, 2), :) = depthSegments(pointsRow, 3); end end threeDPointsMax = max(max(max(threeDPoints))); if threeDPointsMax == 0; threeDPointsImage = threeDPoints; end threeDPointsImage = (1 / threeDPointsMax) * threeDPoints; fullThreeDPointsVec = reshape(threeDPoints, [imrows*gridSize * imcols*gridSize, 3]); validDepthImage = depthImage > 0; imageRowMask = repmat((1:size(images, 1))', [1 size(images, 2)]); imageColMask = repmat(1:size(images, 2), [size(images, 1), 1]); zoomRegionCenter = 0.5 * (imageWindow(1, :) + imageWindow(2, :)); zoomRegionSize = imageWindow(2, :) - imageWindow(1, :) + [1 1]; maxZoomAmmount = 5/8; if (zoomRegionSize(1, 1) < (size(images, 1)* maxZoomAmmount)) zoomRegionSize(1, 1) = ceil(size(images, 1) * maxZoomAmmount); end if (zoomRegionSize(1, 2) < (size(images, 2) * maxZoomAmmount)) zoomRegionSize(1, 2) = ceil(size(images, 2) * maxZoomAmmount); end %% Now resize either the rows or the columns so it maintans the correct %% aspect ratio desiredAspect = size(images, 1) / size(images, 2); zoomRegionAspect = zoomRegionSize(1, 1) / zoomRegionSize(1, 2); if (zoomRegionAspect < desiredAspect) zoomRegionSize(1, 1) = ceil(zoomRegionSize(1, 2) * desiredAspect); elseif (zoomRegionAspect > desiredAspect) zoomRegionSize(1, 2) = ceil(zoomRegionSize(1, 1) / desiredAspect); end %% Now set the zooming region and mask appropriately zoomRegionStart = ceil(zoomRegionCenter - floor(0.5 * zoomRegionSize)); zoomRegionEnd = zoomRegionStart + zoomRegionSize - [1 1]; zoomRegionShiftUp = [1 1] - zoomRegionStart; imageDim = [size(images, 1) size(images, 2)]; zoomRegionShiftDown = imageDim - zoomRegionEnd; zoomRegionShift = (zoomRegionShiftUp > 0) .* zoomRegionShiftUp + (zoomRegionShiftDown < 0) .* zoomRegionShiftDown; zoomRegionStart = zoomRegionStart + zoomRegionShift; zoomRegionEnd = zoomRegionEnd + zoomRegionShift; maskStart = ceil(((imageWindow(1, :) - zoomRegionStart) ./ zoomRegionSize) .* imageDim) + [1 1]; maskEnd = ceil(((imageWindow(2, :) - zoomRegionStart) ./ zoomRegionSize) .* imageDim) + [1 1]; rackRows = zoomRegionStart(1, 1) : zoomRegionEnd(1, 1); rackCols = zoomRegionStart(1, 2) : zoomRegionEnd(1, 2); images = images(rackRows, rackCols, :, :); depthImage = depthImage(rackRows, rackCols, :, :); threeDPointsImage = threeDPointsImage(rackRows, rackCols, :, :); validDepthImage = validDepthImage(rackRows, rackCols, :, :); rescaleVec = [length(rackRows)/480 length(rackCols)/640]; for pic=1:size(images,4); images = imresize(images(:,:,:,pic), [imrows*gridSize, imcols*gridSize], 'bilinear'); depthImage = repmat(depthImage, [1 1 3]); depthImage = imresize(depthImage, [imrows*gridSize, imcols*gridSize], 'bilinear'); depthImage = depthImage(:, :, 1); validDepthImage = repmat(validDepthImage, [1 1 3]); validDepthImage = imresize(validDepthImage, [imrows*gridSize, imcols*gridSize], 'bilinear'); validDepthImage = validDepthImage(:, :, 1); validDepthImage = validDepthImage >= .5; threeDPointsImage = imresize(threeDPointsImage, [imrows*gridSize, imcols*gridSize], 'bilinear'); threeDPointsVector = threeDPointsMax * threeDPointsImage; threeDPointsVector = cat(3, imageRowMask, imageColMask, threeDPointsVector); threeDPointsVector = reshape(threeDPointsVector, [imrows*gridSize*imcols*gridSize 5]); validVector = reshape(validDepthImage, [imrows*gridSize*imcols*gridSize 1]); threeDPointsVector = threeDPointsVector(validVector, :); end; % Create discrete depth mask here discreteDepthImage = makeDiscreteDepthMask(depthImage, imrows, imcols, gridSize); % Flag invlaid depth areas here invalidDepthImage = ~validDepthImage .* (-1 * ones(size(depthImage))); depthImage = validDepthImage .* depthImage + invalidDepthImage; discreteDepthImage = validDepthImage .* discreteDepthImage + invalidDepthImage; farThresh = 1.05; % 1.05 meters, approx reach of arm in dishwasher nearThresh = 0.65; % 0.65 meters, approx near reach of arm in dishwasher originalDepthMask( originalDepthMask > farThresh ) = 0; originalDepthMask( originalDepthMask < nearThresh ) = 0; handleMedians = zeros(3,0); A_LR = images(:,:,:,pic); %cSet = floor([1 imcols*gridSize/4+1 imcols*gridSize/2+1]); %rSet = floor([1 imrows*gridSize/4+1 imrows*gridSize/2+1]); %zoomSets = [rSet(1) cSet(1); rSet(1) cSet(2); rSet(1) cSet(3); rSet(2) cSet(1); rSet(2) cSet(2); rSet(2) cSet(3); rSet(3) cSet(1); rSet(3) cSet(2); rSet(3) cSet(3)]; %zoomSize = [imrows*gridSize/2 imcols*gridSize/2] - 1; %rescaleVec = [1/2 1/2]; if (doZooming == 1) cSet = floor([1 imcols*gridSize/4+1]); rSet = floor([1 imrows*gridSize/4+1]); zoomSets = [rSet(1) cSet(1); rSet(1) cSet(2); rSet(2) cSet(1); rSet(2) cSet(2)]; zoomRescaleVec = [3/4 3/4]; zoomSize = [imrows*gridSize imcols*gridSize] .* zoomRescaleVec - [1 1]; sizeOfRescaledRegion = zoomRescaleVec .* gridSize; baseConfidenceMask = zeros(imrows, imcols); baseNumOfPredictions = ones(imrows, imcols); numPicsToMake = size(zoomSets, 1); for zoomSetting = 1:numPicsToMake startCorner = zoomSets(zoomSetting, :); endCorner = startCorner + zoomSize - [1 1]; thisImage = images(startCorner(1):endCorner(1), startCorner(2):endCorner(2), :); thisImage = imresize(thisImage, [imrows*gridSize, imcols*gridSize], 'bilinear'); thisDepthImage = repmat(depthImage(startCorner(1):endCorner(1), startCorner(2):endCorner(2), :), [1 1 3]); thisDepthImage = imresize(thisDepthImage, [imrows*gridSize, imcols*gridSize], 'bilinear'); thisDepthImage = thisDepthImage(:, :, 1); thisValidDepthImage = repmat(validDepthImage(startCorner(1):endCorner(1), startCorner(2):endCorner(2), :), [1 1 3]); thisValidDepthImage = imresize(thisValidDepthImage, [imrows*gridSize, imcols*gridSize], 'bilinear'); thisValidDepthImage = thisValidDepthImage(:, :, 1); thisValidDepthImage = thisValidDepthImage >= .5; % Create discrete depth mask here thisDiscreteDepthImage = makeDiscreteDepthMask(thisDepthImage, imrows, imcols, gridSize); % Flag invlaid depth areas here thisInvalidDepthImage = ~thisValidDepthImage .* (-1 * ones(size(thisDepthImage))); thisDepthImage = thisValidDepthImage .* thisDepthImage + thisInvalidDepthImage; thisDiscreteDepthImage = thisValidDepthImage .* thisDiscreteDepthImage + thisInvalidDepthImage; thisPointsVector = zeros(0, 5); this3DPointsImage = threeDPointsImage(startCorner(1):endCorner(1), startCorner(2):endCorner(2), :); this3DPointsImage = imresize(this3DPointsImage, [imrows*gridSize, imcols*gridSize], 'bilinear'); this3DPointsImage = threeDPointsMax * this3DPointsImage; this3DPointsImage = cat(3, imageRowMask, imageColMask, this3DPointsImage); this3DPointsVector = reshape(this3DPointsImage, [imrows*gridSize*imcols*gridSize 5]); thisValidVector = reshape(thisValidDepthImage, [imrows*gridSize*imcols*gridSize 1]); this3DPointsVector = this3DPointsVector(find(thisValidVector), :); [labeledVector, confidenceVector] = graspRegionDetectorInImage(thisImage, thisImage, thisDepthImage, thisDiscreteDepthImage, ones(imrows*gridSize, imcols*gridSize), this3DPointsVector, smoothFeatures, medianFilter, calc3dFeats, 1); confRowVec = floor((((1:imrows) - 1) * sizeOfRescaledRegion(1) + (startCorner(1)-1)) / gridSize) + 1; confColVec = floor((((1:imcols) - 1) * sizeOfRescaledRegion(2) + (startCorner(2)-1)) / gridSize) + 1; %[min(confRowVec) max(confRowVec)] %[min(confColVec) max(confColVec)] %confidenceStartCorner = ((startCorner-1) ./ gridSize) + 1; %confidenceEndCorner = floor((endCorner-1) ./ gridSize ./ zoomRescaleVec) + 1; %confidenceStartCorner %confidenceEndCorner %size(baseConfidenceMask) thisConfidenceMask = baseConfidenceMask(confRowVec, confColVec) + reshape(confidenceVector, [imrows, imcols]); baseConfidenceMask(confRowVec, confColVec) = thisConfidenceMask; baseNumOfPredictions(confRowVec, confColVec) = baseNumOfPredictions(confRowVec, confColVec) + ones(imrows, imcols); end scaledBaseConfidenceMask = baseConfidenceMask ./ baseNumOfPredictions; scaledBaseNumOfPredictions = ones(size(scaledBaseConfidenceMask)); imageBlackAndWhite = (double(images(:, :, 1)) + double(images(:, :, 2)) + double(images(:, :, 3))) / (3*255); scaledBaseConfidenceMaskLarge = zeros(size(images(:,:,1))); for r = 1:size(scaledBaseConfidenceMask, 1) for c = 1:size(scaledBaseConfidenceMask, 2) scaledBaseConfidenceMaskLarge((((r-1)*gridSize) + 1):(r*gridSize), (((c-1)*gridSize) + 1):(c*gridSize), :) = scaledBaseConfidenceMask(r, c); end end scaledBaseConfidenceMaskLarge = scaledBaseConfidenceMaskLarge / max(max(scaledBaseConfidenceMaskLarge)); imageOverlay = cat(3, scaledBaseConfidenceMaskLarge, zeros(size(imageBlackAndWhite)), imageBlackAndWhite); if (verbose) figure; subplot(2, 1, 1), image(images); axis equal; subplot(2, 1, 2), image(imageOverlay); axis equal; end %for row = 1:orgConfMaskSize(1) % for col = 1:orgConfMaskSize(2) % rescaledCoord = round([row col] .* zoomRescaleVec); % baseConfidenceMask(rescaledCoord(1), rescaledCoord(2)) = orgConfidenceMask(row,col); % baseNumOfPredictions(rescaledCoord(1), rescaledCoord(2)) = orgNumOfPredictions(row,col); % end %end else scaledBaseConfidenceMask = zeros(imrows, imcols); scaledBaseNumOfPredictions = zeros(imrows, imcols); end orgScaledConfMask = scaledBaseConfidenceMask; orgScaledNumOfPred = scaledBaseNumOfPredictions; distanceOnlyDepthMask = originalDepthMask; testFeatureVector = makeFeatureVector(images, depthImage, discreteDepthImage, threeDPointsVector, calc3dFeats, verbose); for thisHandleNum = 1:numHandlesToPredict; rescaledDepthMask = zeros(imrows*gridSize, imcols*gridSize); distanceOnlyRescaledDepthMask = zeros(imrows*gridSize, imcols*gridSize); rackRows1 = rackRows(1); rackCols1 = rackCols(1); originalCoords_y = floor( (0:(imrows*gridSize-1)) * rescaleVec(1) + rackRows1); originalCoords_x = floor( (0:(imcols*gridSize-1)) * rescaleVec(2) + rackCols1); for y=1:imrows*gridSize; %480 for x=1:imcols*gridSize; %640 %originalCoords = round( [y x] .* rescaleVec + [rackRows1 rackCols1] ); %rescaledDepthMask(y, x) = originalDepthMask(originalCoords(1), originalCoords(2)); %distanceOnlyRescaledDepthMask(y, x) = distanceOnlyDepthMask(originalCoords(1), originalCoords(2)); rescaledDepthMask(y, x) = originalDepthMask( originalCoords_y(y), originalCoords_x(x) ); distanceOnlyRescaledDepthMask(y, x) = distanceOnlyDepthMask(originalCoords_y(y), originalCoords_x(x)); end end dishwasherMask = zeros(imrows*gridSize, imcols*gridSize); maskGridStart = max(floor((maskStart - [1 1] ) ./ gridSize), [2 2]); maskGridEnd = min(floor((maskEnd - [1 1] ) ./ gridSize), [47 63]); maskGridStart = [7 7]; maskGridEnd = [34 54]; dishwasherMask(maskGridStart(1, 1)*gridSize-(gridSize-1):maskGridEnd(1, 1)*gridSize, ... maskGridStart(1, 2)*gridSize-(gridSize-1):maskGridEnd(1, 2)*gridSize) ... = ones((maskGridEnd - maskGridStart + [1 1]) * gridSize); % if (forIntegration == 1) % dishwasherMask(7*gridSize-9:34*gridSize, 7*gridSize-9:54*gridSize) = ones(28*gridSize, 48*gridSize); % elseif (forIntegration == 2) % dishwasherMask(3*gridSize-9:32*gridSize, 3*gridSize-9:62*gridSize) = ones(30*gridSize, 60*gridSize); % else % dishwasherMask(5*gridSize-9:40*gridSize, 10*gridSize-9:58*gridSize) = ones(36*gridSize, 49*gridSize); % end depthMask = double(rescaledDepthMask ~= 0) & dishwasherMask; distanceOnlyDepthMask = double(rescaledDepthMask ~= 0) & dishwasherMask; scaledBaseConfidenceMask = orgScaledConfMask; scaledBaseNumOfPredictions = orgScaledNumOfPred; if 1 %average two predictions % [labeledVector, confidenceVector] = graspRegionDetectorInImage(images, images, depthImage, discreteDepthImage, depthMask, threeDPointsVector, smoothFeatures, medianFilter, calc3dFeats, 0, 1, scaledBaseConfidenceMask, scaledBaseNumOfPredictions, true, true, trainingWeightsFilename1); % scaledBaseConfidenceMask = scaledBaseConfidenceMask + reshape(confidenceVector, [imrows, imcols]); % scaledBaseNumOfPredictions = scaledBaseNumOfPredictions + ones(imrows, imcols); % [labeledVector, confidenceVector] = graspRegionDetectorInImage(images, images, depthImage, discreteDepthImage, depthMask, threeDPointsVector, smoothFeatures, medianFilter, calc3dFeats, 0, 1, scaledBaseConfidenceMask, scaledBaseNumOfPredictions, true, true, trainingWeightsFilename2); [labeledVector, confidenceVector] = graspRegionDetectorInImage(images, images, depthImage, discreteDepthImage, depthMask, threeDPointsVector, smoothFeatures, medianFilter, calc3dFeats, 0, 1, scaledBaseConfidenceMask, scaledBaseNumOfPredictions, true, verbose, trainingWeightsFilenameToUse, testFeatureVector); scaledBaseConfidenceMask = scaledBaseConfidenceMask + reshape(confidenceVector, [imrows, imcols]); scaledBaseNumOfPredictions = scaledBaseNumOfPredictions + ones(imrows, imcols); else %treat predictions as bayes rule and multiply % [labeledVector, confidenceVector] = graspRegionDetectorInImage(images, images, depthImage, discreteDepthImage, depthMask, threeDPointsVector, smoothFeatures, medianFilter, calc3dFeats, 1, 1, scaledBaseConfidenceMask, scaledBaseNumOfPredictions, true, true, trainingWeightsFilename3); % scaledBaseConfidenceMask = scaledBaseConfidenceMask + reshape(confidenceVector, [imrows, imcols]); % scaledBaseNumOfPredictions = scaledBaseNumOfPredictions + ones(imrows, imcols); [labeledVector, confidenceVector] = graspRegionDetectorInImage(images, images, depthImage, discreteDepthImage, distanceOnlyDepthMask, threeDPointsVector, smoothFeatures, medianFilter, calc3dFeats, 1, 1, scaledBaseConfidenceMask, scaledBaseNumOfPredictions, true, verbose, trainingWeightsFilenameToUse); scaledBaseConfidenceMask = reshape(confidenceVector, [imrows, imcols]); scaledBaseNumOfPredictions = scaledBaseNumOfPredictions + ones(imrows, imcols); doWhiteout = 0; if doWhiteout whiteMask = scaledBaseConfidenceMask < 0.1836; whiteMask = repmat(whiteMask, [1 1 3]); multiplier = 1; whiteMask = imresize(whiteMask, [480 640]); if (max(max(max(images))) > 1) whiteMask = 255 * whiteMask; end newImage = double(images); newImage = uint8(max(newImage, whiteMask)); else newImage = images; end [labeledVector, confidenceVector] = graspRegionDetectorInImage(images, newImage, depthImage, discreteDepthImage, depthMask, threeDPointsVector, smoothFeatures, medianFilter, calc3dFeats, 1, 1, scaledBaseConfidenceMask, scaledBaseNumOfPredictions, true, verbose, trainingWeightsFilenameToUse); scaledBaseConfidenceMask = scaledBaseConfidenceMask + reshape(confidenceVector, [imrows, imcols]); scaledBaseNumOfPredictions = scaledBaseNumOfPredictions + ones(imrows, imcols); end confidence = [ ]; nRegions = zeros( size(images,4),1); handleCoords = []; for pic=1:size(images,4); A_Labels = reshape(labeledVector((pic-1)*imcols*imrows + 1:(pic)*imcols*imrows, 1), [imrows, imcols]); A_Confidence = reshape(confidenceVector( (pic-1)*imcols*imrows + 1:pic*imcols*imrows, 1), [imrows, imcols]); H = rgb2hsv(images(:,:,:,pic)); objectMask = H(:,:,2) > mean(mean( H(:,:,2) )); objectMaskScaled = zeros(imrows,imcols); for y=1:imrows; %48 for x=1:imcols; %64 objectMaskScaled(y, x) = mean(mean(objectMask(10*y-9:y*10, 10*x-9:x*10))) > 0.5; if A_Labels(y, x)% && objectMaskScaled(y, x) A_LR(gridSize*y-(gridSize-1):y*gridSize, gridSize*x-(gridSize-1):x*gridSize, 1) = 256; confidence = [confidence; A_Confidence(y,x)]; nRegions(pic) = nRegions(pic) + 1; botRightCorner = gridSize * [y x]; topLeftCorner = botRightCorner - gridSize + 1; botRightCorner = floor((botRightCorner - 1) .* rescaleVec + [rackRows(1) rackCols(1)]); topLeftCorner = floor((topLeftCorner - 1) .* rescaleVec + [rackRows(1) rackCols(1)]); thisRegion = threeDPoints(topLeftCorner(1):botRightCorner(1), topLeftCorner(2):botRightCorner(2), :); numPointsInRegion = size(thisRegion, 1) * size(thisRegion, 2); pickup = reshape(thisRegion, [numPointsInRegion, 3]); pickupsValid = reshape(originalDepthMask(topLeftCorner(1):botRightCorner(1), topLeftCorner(2):botRightCorner(2), :), [numPointsInRegion, 1]); pickup = pickup(pickupsValid ~= 0, :); handleCoords = [handleCoords; pickup]; end; end; end; %imshow(objectMaskScaled)false imwrite(A_LR, [saveDataDirectory sprintf('LR_Picture_%d.jpg', pic)], 'jpg'); end; handleMedian = zeros(3,1); distVec = []; for n = 1:size(handleCoords,1); distMat = repmat(handleCoords(n,:), size(handleCoords,1), 1) - handleCoords; distMat = distMat.^2; distMat = sum(distMat, 2); distMat = sqrt(distMat); distVec = [distVec, sum(distMat)]; end; [temp, index] = min(distVec); thisHandle = handleCoords(index, :)'; handleMedians = [handleMedians thisHandle]; % Now remove points surrounding this handle and predict anew distMat = repmat(thisHandle', size(fullThreeDPointsVec, 1), 1) - fullThreeDPointsVec; distMat = distMat.^2; distMat = sum(distMat, 2); distMat = sqrt(distMat); keepThese = distMat > 0.05; keepThese = reshape(keepThese, size(originalDepthMask)); originalDepthMask = originalDepthMask .* keepThese; end X = [handleMedians; ones(1, size(handleMedians, 2))]; Y = X'; save 'grasps.txt' Y -ascii end; return;