Hi,
I've been trying out boost.proto lately and I've noticed that for my
expressions it tends to force my operands to be created on the stack. For
example, instead of optimizing out an object completely when everything
about it
is known at compile-time, the compiler (MSVC 2005 or 2008) is creating that
object on the stack anyway.. even though it's not necessary. Even worse, it
actually folds the simpler expressions at compile-time and never even
touches
the data that it just put on the stack. The problem is compounded with
larger
expressions involving many operands. It even seems to build the larger
expression objects on the stack unnecessarily (and this looks like it starts
to
break constant folding).
Here's some of the generated assembly:
int main()
{
00401800 sub esp,18h
vector2< float[3] > v1 = { 0, 1, 2 };
00401803 xorps xmm0,xmm0
vector2< float[3] > v2 = { 3, 4, 5 };
vector2< float[3] > v3 = { 6, 7, 8 };
// Add two vectors lazily and get the 2nd element.
#if 1
// this sticks temporary junk all over the stack, even with 2 terms
std::cout << (v1 + v2)[0];
00401806 fld dword ptr [__real@40400000 (402128h)]
0040180C movss dword ptr [esp],xmm0
00401811 movss xmm0,dword ptr [__real@3f800000 (402124h)]
00401819 movss dword ptr [esp+4],xmm0
0040181F movss xmm0,dword ptr [__real@40000000 (402120h)]
00401827 movss dword ptr [esp+8],xmm0
0040182D movss xmm0,dword ptr [__real@40400000 (402128h)]
00401835 movss dword ptr [esp+0Ch],xmm0
0040183B movss xmm0,dword ptr [__real@40800000 (40211Ch)]
00401843 push ecx
00401844 mov ecx,dword ptr [__imp_std::cout (402044h)]
0040184A fstp dword ptr [esp]
0040184D movss dword ptr [esp+14h],xmm0
00401853 movss xmm0,dword ptr [__real@40a00000 (402118h)]
0040185B movss dword ptr [esp+18h],xmm0
00401861 call dword ptr
[__imp_std::basic_ostream
::type::value_type result_type;
result_type operator ()(Expr const & expr, subscript_context
const & ctx) const
{
return proto::arg(expr)[ctx.i];
}
};
};
// Here is the domain-specific expression wrapper, which overrides
// operator [] to evaluate the expression using the subscript_context.
template< typename Expr >
struct vector_expr : proto::extends
{ typedef proto::extends< Expr, vector_expr< Expr >, VectorDomain > base_type; vector_expr(Expr const & expr) : base_type(expr) { } // Use the subscript_context to implement subscripting // of a lazy vector expression tree. typename proto::result_of::eval< Expr, subscript_context const
::type operator [](std::size_t i) const { subscript_context const ctx(i); return proto::eval(*this, ctx); } };
// Tell proto that in the VectorDomain, all // expressions should be wrapped in vector_expr<> struct VectorDomain : proto::domain< proto::generator< vector_expr >, VectorGrammar > { }; template< typename T > struct is_vector : mpl::false_ { }; template< typename T > struct is_vector< vector2< T > > : mpl::true_ { }; BOOST_PROTO_DEFINE_BINARY_OPERATOR(+, boost::proto::tag::plus, is_vector, VectorDomain); int main() { vector2< float[3] > v1 = { 0, 1, 2 }; vector2< float[3] > v2 = { 3, 4, 5 }; vector2< float[3] > v3 = { 6, 7, 8 }; // Add two vectors lazily and get the 2nd element. #if 1 // this sticks write-only temporary junk all over the stack, even with 2 terms std::cout << (v1 + v2)[0];// + v3 + v3 + v3 + v3 + v3)[0]; #else // this optimizes out completely for 2 terms... expanding to 3 terms does not typedef proto::terminal< vector2< float[3] > >::type expr_type; std::cout << (vector_expr< expr_type >( expr_type::make(v2) ) + vector_expr< expr_type >( expr_type::make(v3) ))[0]; #endif std::cin.get(); return 0; } Thanks in advance. // AJD < n.outpost@gmail.com >