summaryrefslogtreecommitdiffstats
path: root/src/kvilib/core/kvi_memmove.cpp
blob: 504cf6c8b6bd27467aa64dc5c9c1cf038dc7ce31 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
//=============================================================================
//
//   File : kvi_memmove.cpp
//   Creation date : Sun Jun 18 2000 18:27:50 CEST by Szymon Stefanek
//
//   This file is part of the KVirc irc client distribution
//   Copyright (C) 1999-2006 Szymon Stefanek (pragma at kvirc dot net)
//
//   This program is FREE software. You can redistribute it and/or
//   modify it under the terms of the GNU General Public License
//   as published by the Free Software Foundation; either version 2
//   of the License, or (at your opinion) any later version.
//
//   This program is distributed in the HOPE that it will be USEFUL,
//   but WITHOUT ANY WARRANTY; without even the implied warranty of
//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//   See the GNU General Public License for more details.
//
//   You should have received a copy of the GNU General Public License
//   along with this program. If not, write to the Free Software Foundation,
//   Inc. ,51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
//
//=============================================================================

#define __KVILIB__

#define _KVI_DEBUG_CHECK_RANGE_
#include "kvi_debug.h"

#define _KVI_MEMMOVE_CPP_
#include "kvi_memmove.h"

// FIXME: #warning "With system memmove could be guessed by configure"

#ifndef COMPILE_WITH_SYSTEM_MEMMOVE
	
	#ifdef COMPILE_ix86_ASM


		void *kvi_memmove(void * dst_ptr,const void *src_ptr,int len)
		{
			__range_valid(dst_ptr);
			__range_valid(src_ptr);
			__range_valid(len >= 0);
			// Save pointer registers
			asm("	pushl %esi");                     // save %esi
			asm("	pushl %edi");                     // save %edi
			// Load arguments
			asm("	movl 16(%ebp),%ecx");             // %ecx = len
			asm("	movl 12(%ebp),%esi");             // %esi = src
			asm("	movl 8(%ebp),%edi");              // %edi = dst
			// Compare src and dest
			asm("	cmpl %esi,%edi");                 // %edi - %esi
			asm("	jbe move_from_bottom_to_top");    // if(%edi < %esi) jump to move_from_bottom_to_top
			// dst_ptr > src_ptr
			asm("	addl %ecx,%esi");                 // %esi += %ecx (src_ptr += len);
			asm("	addl %ecx,%edi");                 // %edi += %ecx (dst_ptr += len);
			asm("	decl %esi");                      // %esi--; (src_ptr--);
			asm("	decl %edi");                      // %edi--; (dst_ptr--);
			asm("	std");                            // set direction flag (decrement esi and edi in movsb)
			// Optimization : check for non-odd len (1,3,5,7...)
			asm("	shr $1,%ecx");                    // %ecx >> 1 , shifted bit -> CF
			asm("	jnc move_two_bytes_top_to_bottom_directly");  // if !carry (CF == 0) skip this move
			// Move the first byte (non-odd)
			asm("	movsb %ds:(%esi),%es:(%edi)");    // *dst-- = *src-- if DF  else *dst++ = *src++
			asm("move_two_bytes_top_to_bottom_directly:");
			asm("	decl %esi");                      // %esi--; (src_ptr--);
			asm("	decl %edi");                      // %edi--; (dst_ptr--);
			asm("move_two_bytes_top_to_bottom:");
			asm("	shr $1,%ecx");                    // %ecx >> 1 , shifted bit -> CF
			asm("	jnc move_the_rest_top_to_bottom_directly"); // if !carry (CF == 0) skip this move
			// Move the next two bytes
			asm("	movsw %ds:(%esi),%es:(%edi)");    // *((word *)dst)-- = *((word)src)-- if DF else *((word *)dst)++ = *((word)src)++
			asm("move_the_rest_top_to_bottom_directly:");
			asm("	subl $2,%esi");                   // %esi-=2; (src-=2);
			asm("   subl $2,%edi");                   // %edi-=2; (dst-=2);
			asm("   jmp move_the_rest");              // call last repnz movsl
			// dst_ptr <= src_ptr
			asm("move_from_bottom_to_top:");
			asm("	cld");                            // clear direction flag (increment esi and edi in movsb)
			// Optimization : check for non-odd len (1,3,5,7...)
			asm("	shr $1,%ecx");                    // %ecx >> 1 , shifted bit -> CF
			asm("	jnc move_two_bytes");             // if !carry (CF == 0) skip this move
			// Move the first byte (non-odd)
			asm("	movsb %ds:(%esi),%es:(%edi)");    // *dst-- = *src-- if DF  else *dst++ = *src++
			// Optimization : pass 2 , check for %2 and %3
			asm("move_two_bytes:");
			asm("	shr $1,%ecx");                    // %ecx >> 1 , shifted bit -> CF
			asm("	jnc move_the_rest");              // if !carry (CF == 0) skip this move
			// Move the next two bytes
			asm("	movsw %ds:(%esi),%es:(%edi)");    // *((word *)dst)-- = *((word)src)-- if DF else *((word *)dst)++ = *((word)src)++
			// Main move remaining part
			asm("move_the_rest:");
			asm("	repnz; movsl %ds:(%esi),%es:(%edi)"); // loop moving 4 bytes at once (increment or decrement as above)
			// Restore pointer registers
			asm("	popl %edi");                      // restore %edi
			asm("	popl %esi");                      // restore %esi
			return dst_ptr; //asm("   movl 8(%ebp),%eax"); <-- gcc will put that (AFTER THE OPTIMISATION PASS!)
		}
	
		void *kvi_memmoveodd(void * dst_ptr,const void *src_ptr,int len)
		{
			__range_valid(dst_ptr);
			__range_valid(src_ptr);
			__range_valid(len >= 0);
			// Save pointer registers
			asm("	pushl %esi");                     // save %esi
			asm("	pushl %edi");                     // save %edi
			// Load arguments
			asm("	movl 16(%ebp),%ecx");             // %ecx = len
			asm("	movl 12(%ebp),%esi");             // %esi = src
			asm("	movl 8(%ebp),%edi");              // %edi = dst
			// Compare src and dest
			asm("	cmpl %esi,%edi");                 // %edi - %esi
			asm("	jbe xmove_from_bottom_to_top");    // if(%edi < %esi) jump to move_from_bottom_to_top
			// dst_ptr > src_ptr
			asm("	addl %ecx,%esi");                 // %esi += %ecx (src_ptr += len);
			asm("	addl %ecx,%edi");                 // %edi += %ecx (dst_ptr += len);
			asm("	std");                            // set direction flag (decrement esi and edi in movsb)
			// start moving
			asm("	shr $2,%ecx");                    // %ecx >> 2 , last shifted bit -> CF
			asm("	jnc xmove_the_rest_top_to_bottom_directly"); // if !carry (CF == 0) skip this move
			// Move the next two bytes
			asm("	subl $2,%esi");                   // %esi-=2; (src_ptr-=2);
			asm("	subl $2,%edi");                   // %edi-=2; (dst_ptr-=2);
			asm("	movsw %ds:(%esi),%es:(%edi)");    // *((word *)dst)-- = *((word)src)-- if DF else *((word *)dst)++ = *((word)src)++
			asm("	subl $2,%esi");                   // %esi-=2; (src_ptr-=2);
			asm("	subl $2,%edi");                   // %edi-=2; (dst_ptr-=2);
			asm("   jmp xmove_the_rest");
			asm("xmove_the_rest_top_to_bottom_directly:");
			asm("	subl $4,%esi");                   // %esi-=4; (src-=4);
			asm("   subl $4,%edi");                   // %edi-=4; (dst-=4);
			asm("   jmp xmove_the_rest");              // call last repnz movsl
			// dst_ptr <= src_ptr
			asm("xmove_from_bottom_to_top:");
			asm("	cld");                            // clear direction flag (increment esi and edi in movsb)
			// move it
			asm("	shr $2,%ecx");                    // %ecx >> 2 , last shifted bit -> CF
			asm("	jnc xmove_the_rest");              // if !carry (CF == 0) skip this move
			// Move the next two bytes
			asm("	movsw %ds:(%esi),%es:(%edi)");    // *((word *)dst)-- = *((word)src)-- if DF else *((word *)dst)++ = *((word)src)++
			// Main move remaining part
			asm("xmove_the_rest:");
			asm("	repnz; movsl %ds:(%esi),%es:(%edi)"); // loop moving 4 bytes at once (increment or decrement as above)
			// Restore pointer registers
			asm("	popl %edi");                      // restore %edi
			asm("	popl %esi");                      // restore %esi
			return dst_ptr; //asm("   movl 8(%ebp),%eax"); <-- gcc will put that (AFTER THE OPTIMISATION PASS!)
		}

	#else // ndef COMPILE_ix86_ASM
	


		// The next 4 functions could be optimized with the & and shift technique
		// used in the assembly implementations but the compilers usually
		// will not translate the carry bit trick producing code
		// that works slower on short block of memory (really near the average case)
	
		// The trick would be:
		//
		//    if(len & 1) // the length is even
		//       *dst-- = *src--; // move one byte
		//    len >> 1; // drop the last bit (thus divide by 2)
		//    if(len & 1) // the length is still even
		//       *((short *)dst)-- = *((short *)src)--; // move two bytes
		//    len >> 1; // again drop the last bit (thus divide by 2)
		//    while(len--)*((int *)dst)-- = *((int *)src)--; // move four bytes at a time
		//
		//
	
		void *kvi_memmove(void *dst_ptr,const void *src_ptr,int len)
		{
			__range_valid(dst_ptr);
			__range_valid(src_ptr);
			__range_valid(len >= 0);
			char *dst;
			const char *src;
			if(dst_ptr > src_ptr){
				dst = (char *)dst_ptr + len - 1;
				src = (const char *)src_ptr + len - 1;
				while(len--)*dst-- = *src--;
		    } else { //it is valid even if dst_ptr == src_ptr 
				dst = (char *)dst_ptr;
				src = (const char *)src_ptr;
				while(len--)*dst++ = *src++;
		    }
		    return dst_ptr;
		}
	
		void *kvi_memmoveodd(void *dst_ptr,const void *src_ptr,int len)
		{
			__range_valid(dst_ptr);
			__range_valid(src_ptr);
			__range_valid(len >= 0);
			__range_valid((len & 1) == 0);
			short *dst;
			const short *src;
			if(dst_ptr > src_ptr){
				dst = (short *) (((char *)dst_ptr) + len - 2);
				src = (const short *) (((const char *)src_ptr) + len - 2);
				while(len > 0)
				{
					*dst-- = *src--;
					len -= 2;
				}
		    } else { //it is valid even if dst_ptr == src_ptr 
				dst = (short *)dst_ptr;
				src = (const short *)src_ptr;
				while(len > 0)
				{
					*dst++ = *src++;
					len -= 2;
				}
		    }
		    return dst_ptr;
		}
		
		void kvi_fastmove(void *dst_ptr,const void *src_ptr,int len)
		{
			__range_valid(dst_ptr);
			__range_valid(src_ptr);
			__range_valid(len >= 0);
			const char *src = (const char *)src_ptr;
			char *dst = (char *)dst_ptr;
			while(len--)*dst++ = *src++;
		}
	
		void kvi_fastmoveodd(void *dst_ptr,const void *src_ptr,int len)
		{
			__range_valid(dst_ptr);
			__range_valid(src_ptr);
			__range_valid(len >= 0);
			__range_valid((len & 1) == 0);
			const short *src = (const short *)src_ptr;
			short *dst = (short *)dst_ptr;
			while(len > 0){
				*dst++ = *src++;
				len -= 2;
			}
		}

	#endif // !COMPILE_ix86_ASM
	
	void kvi_memset(void *dst_ptr,char c,int len)
	{
		__range_valid(dst_ptr);
		__range_valid(len >= 0);
		char *dst = (char *)dst_ptr;
		while(len--)*dst++ = c;
	}

#endif // !COMPILE_WITH_SYSTEM_MEMMOVE