From 94854170e7d6ca6ad85cda6469dcf4d836162984 Mon Sep 17 00:00:00 2001
From: Naoaki Okazaki <okazaki@chokkan.org>
Date: Wed, 17 Nov 2010 09:25:11 +0900
Subject: [PATCH] Patch submitted by Nic Schraudolph to fix compiling problems
 on Mac OS X.

---
 ChangeLog                   |  6 ++++++
 include/lbfgs.h             |  4 ++++
 lib/arithmetic_sse_double.h |  8 ++++++--
 lib/arithmetic_sse_float.h  | 10 +++++++++-
 4 files changed, 25 insertions(+), 3 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index b85e87d..d205e2e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2010-xx-xx  Naoaki Okazaki  <okazaki at chokkan org>
+
+	* libLBFGS 1.10:
+	- Fixed compiling errors on Mac OS X; this patch was kindly submitted by Nic Schraudolph.
+
+
 2010-01-29  Naoaki Okazaki  <okazaki at chokkan org>
 
 	* libLBFGS 1.9:
diff --git a/include/lbfgs.h b/include/lbfgs.h
index 7aa7b1f..2cb48cf 100644
--- a/include/lbfgs.h
+++ b/include/lbfgs.h
@@ -598,6 +598,9 @@ libLBFGS is distributed under the term of the
 <a href="http://opensource.org/licenses/mit-license.php">MIT license</a>.
 
 @section changelog History
+- Version 1.10 (2010-xx-xx):
+    - Fixed compiling errors on Mac OS X; this patch was kindly submitted by
+      Nic Schraudolph.
 - Version 1.9 (2010-01-29):
     - Fixed a mistake in checking the validity of the parameters "ftol" and
       "wolfe"; this was discovered by Kevin S. Van Horn.
@@ -718,6 +721,7 @@ Special thanks go to:
     - Yoshimasa Tsuruoka and Daisuke Okanohara for technical information about
       OWL-QN
     - Takashi Imamichi for the useful enhancements of the backtracking method
+    - Kevin S. Van Horn and Nic Schraudolph for bug fixes
 
 Finally I would like to thank the original author, Jorge Nocedal, who has been
 distributing the effieicnt and explanatory implementation in an open source
diff --git a/lib/arithmetic_sse_double.h b/lib/arithmetic_sse_double.h
index ac3be80..7c25660 100644
--- a/lib/arithmetic_sse_double.h
+++ b/lib/arithmetic_sse_double.h
@@ -26,7 +26,9 @@
 /* $Id$ */
 
 #include <stdlib.h>
+#ifndef __APPLE__
 #include <malloc.h>
+#endif
 #include <memory.h>
 
 #if     1400 <= _MSC_VER
@@ -39,8 +41,10 @@
 
 inline static void* vecalloc(size_t size)
 {
-#ifdef	_MSC_VER
+#if     defined(_MSC_VER)
     void *memblock = _aligned_malloc(size, 16);
+#elif   defined(__APPLE__)  /* OS X always aligns on 16-byte boundaries */
+    void *memblock = malloc(size);
 #else
     void *memblock = memalign(16, size);
 #endif
@@ -192,7 +196,7 @@ inline static void vecfree(void *memblock)
 
 
 
-#if     3 <= __SSE__
+#if     3 <= __SSE__ || defined(__SSE3__)
 /*
     Horizontal add with haddps SSE3 instruction. The work register (rw)
     is unused.
diff --git a/lib/arithmetic_sse_float.h b/lib/arithmetic_sse_float.h
index b04bedf..c5042bd 100644
--- a/lib/arithmetic_sse_float.h
+++ b/lib/arithmetic_sse_float.h
@@ -26,7 +26,9 @@
 /* $Id$ */
 
 #include <stdlib.h>
+#ifndef __APPLE__
 #include <malloc.h>
+#endif
 #include <memory.h>
 
 #if     1400 <= _MSC_VER
@@ -45,7 +47,13 @@
 
 inline static void* vecalloc(size_t size)
 {
+#if     defined(_MSC_VER)
     void *memblock = _aligned_malloc(size, 16);
+#elif   defined(__APPLE__)  /* OS X always aligns on 16-byte boundaries */
+    void *memblock = malloc(size);
+#else
+    void *memblock = memalign(16, size);
+#endif
     if (memblock != NULL) {
         memset(memblock, 0, size);
     }
@@ -185,7 +193,7 @@ inline static void vecfree(void *memblock)
 
 
 
-#if     3 <= __SSE__
+#if     3 <= __SSE__ || defined(__SSE3__)
 /*
     Horizontal add with haddps SSE3 instruction. The work register (rw)
     is unused.