summaryrefslogtreecommitdiffstats
path: root/meta/recipes-devtools/python/python3/create_manifest3.py
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-devtools/python/python3/create_manifest3.py')
-rw-r--r--meta/recipes-devtools/python/python3/create_manifest3.py321
1 files changed, 321 insertions, 0 deletions
diff --git a/meta/recipes-devtools/python/python3/create_manifest3.py b/meta/recipes-devtools/python/python3/create_manifest3.py
new file mode 100644
index 0000000000..ead27e9fcc
--- /dev/null
+++ b/meta/recipes-devtools/python/python3/create_manifest3.py
@@ -0,0 +1,321 @@
1# This script is used as a bitbake task to create a new python manifest
2# $ bitbake python -c create_manifest
3#
4# Our goal is to keep python-core as small as posible and add other python
5# packages only when the user needs them, hence why we split upstream python
6# into several packages.
7#
8# In a very simplistic way what this does is:
9# Launch python and see specifically what is required for it to run at a minimum
10#
11# Go through the python-manifest file and launch a separate task for every single
12# one of the files on each package, this task will check what was required for that
13# specific module to run, these modules will be called dependencies.
14# The output of such task will be a list of the modules or dependencies that were
15# found for that file.
16#
17# Such output will be parsed by this script, we will look for each dependency on the
18# manifest and if we find that another package already includes it, then we will add
19# that package as an RDEPENDS to the package we are currently checking; in case we dont
20# find the current dependency on any other package we will add it to the current package
21# as part of FILES.
22#
23#
24# This way we will create a new manifest from the data structure that was built during
25# this process, ont this new manifest each package will contain specifically only
26# what it needs to run.
27#
28# There are some caveats which we try to deal with, such as repeated files on different
29# packages, packages that include folders, wildcards, and special packages.
30# Its also important to note that this method only works for python files, and shared
31# libraries. Static libraries, header files and binaries need to be dealt with manually.
32#
33# This script differs from its python2 version mostly on how shared libraries are handled
34# The manifest file for python3 has an extra field which contains the cached files for
35# each package.
36# Tha method to handle cached files does not work when a module includes a folder which
37# itself contains the pycache folder, gladly this is almost never the case.
38#
39# Author: Alejandro Enedino Hernandez Samaniego "aehs29" <aehs29@gmail.com>
40
41
42import sys
43import subprocess
44import json
45import os
46
47# Hack to get native python search path (for folders), not fond of it but it works for now
48pivot='recipe-sysroot-native'
49for p in sys.path:
50 if pivot in p:
51 nativelibfolder=p[:p.find(pivot)+len(pivot)]
52
53# Empty dict to hold the whole manifest
54new_manifest = {}
55
56# Check for repeated files, folders and wildcards
57allfiles=[]
58repeated=[]
59wildcards=[]
60
61hasfolders=[]
62allfolders=[]
63
64def isFolder(value):
65 if os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib64')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib32')):
66 return True
67 else:
68 return False
69
70def isCached(item):
71 if '__pycache__' in item:
72 return True
73 else:
74 return False
75
76# Read existing JSON manifest
77with open('python3-manifest.json') as manifest:
78 old_manifest=json.load(manifest)
79
80
81# First pass to get core-package functionality, because we base everything on the fact that core is actually working
82# Not exactly the same so it should not be a function
83print ('Getting dependencies for core package:')
84
85# Special call to check for core package
86output = subprocess.check_output([sys.executable, 'get_module_deps3.py', 'python-core-package']).decode('utf8')
87for item in output.split():
88 # We append it so it doesnt hurt what we currently have:
89 if isCached(item):
90 if item not in old_manifest['core']['cached']:
91 # We use the same data structure since its the one which will be used to check
92 # dependencies for other packages
93 old_manifest['core']['cached'].append(item)
94 else:
95 if item not in old_manifest['core']['files']:
96 # We use the same data structure since its the one which will be used to check
97 # dependencies for other packages
98 old_manifest['core']['files'].append(item)
99
100for value in old_manifest['core']['files']:
101 # Ignore folders, since we don't import those, difficult to handle multilib
102 if isFolder(value):
103 # Pass it directly
104 if isCached(value):
105 if value not in old_manifest['core']['cached']:
106 old_manifest['core']['cached'].append(value)
107 else:
108 if value not in old_manifest['core']['files']:
109 old_manifest['core']['files'].append(value)
110 continue
111 # Ignore binaries, since we don't import those, assume it was added correctly (manually)
112 if '${bindir}' in value:
113 # Pass it directly
114 if value not in old_manifest['core']['files']:
115 old_manifest['core']['files'].append(value)
116 continue
117 # Ignore empty values
118 if value == '':
119 continue
120 if '${includedir}' in value:
121 if value not in old_manifest['core']['files']:
122 old_manifest['core']['files'].append(value)
123 continue
124 # Get module name , shouldnt be affected by libdir/bindir
125 value = os.path.splitext(os.path.basename(os.path.normpath(value)))[0]
126
127
128 # Launch separate task for each module for deterministic behavior
129 # Each module will only import what is necessary for it to work in specific
130 print ('Getting dependencies for module: %s' % value)
131 output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % value]).decode('utf8')
132 print (output)
133 for item in output.split():
134 # We append it so it doesnt hurt what we currently have:
135 if isCached(item):
136 if item not in old_manifest['core']['cached']:
137 # We use the same data structure since its the one which will be used to check
138 # dependencies for other packages
139 old_manifest['core']['cached'].append(item)
140 else:
141 if item not in old_manifest['core']['files']:
142 # We use the same data structure since its the one which will be used to check
143 # dependencies for other packages
144 old_manifest['core']['files'].append(item)
145
146
147# We check which packages include folders
148for key in old_manifest:
149 for value in old_manifest[key]['files']:
150 # Ignore folders, since we don't import those, difficult to handle multilib
151 if isFolder(value):
152 print ('%s is a folder' % value)
153 if key not in hasfolders:
154 hasfolders.append(key)
155 if value not in allfolders:
156 allfolders.append(value)
157
158for key in old_manifest:
159 # Use an empty dict as data structure to hold data for each package and fill it up
160 new_manifest[key]={}
161 new_manifest[key]['files']=[]
162
163 new_manifest[key]['rdepends']=[]
164 # All packages should depend on core
165 if key != 'core':
166 new_manifest[key]['rdepends'].append('core')
167 new_manifest[key]['cached']=[]
168 else:
169 new_manifest[key]['cached']=old_manifest[key]['cached']
170 new_manifest[key]['summary']=old_manifest[key]['summary']
171
172 # Handle special cases, we assume that when they were manually added
173 # to the manifest we knew what we were doing.
174 print ('Handling package %s' % key)
175 special_packages=['misc', 'modules', 'dev']
176 if key in special_packages or 'staticdev' in key:
177 print('Passing %s package directly' % key)
178 new_manifest[key]=old_manifest[key]
179 continue
180
181 for value in old_manifest[key]['files']:
182 # We already handled core on the first pass
183 if key == 'core':
184 new_manifest[key]['files'].append(value)
185 continue
186 # Ignore folders, since we don't import those, difficult to handle multilib
187 if isFolder(value):
188 # Pass folders directly
189 new_manifest[key]['files'].append(value)
190 # Ignore binaries, since we don't import those
191 if '${bindir}' in value:
192 # Pass it directly to the new manifest data structure
193 if value not in new_manifest[key]['files']:
194 new_manifest[key]['files'].append(value)
195 continue
196 # Ignore empty values
197 if value == '':
198 continue
199 if '${includedir}' in value:
200 if value not in new_manifest[key]['files']:
201 new_manifest[key]['files'].append(value)
202 continue
203 # Get module name , shouldnt be affected by libdir/bindir
204 value = os.path.splitext(os.path.basename(os.path.normpath(value)))[0]
205
206 # Launch separate task for each module for deterministic behavior
207 # Each module will only import what is necessary for it to work in specific
208 print ('Getting dependencies for module: %s' % value)
209 output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % value]).decode('utf8')
210 # We can print dependencies for debugging purposes
211 print (output)
212 # Output will have all dependencies
213 for item in output.split():
214
215 # Warning: This first part is ugly
216 # One of the dependencies that was found, could be inside of one of the folders included by another package
217 # We need to check if this happens so we can add the package containing the folder as an rdependency
218 # e.g. Folder encodings contained in codecs
219 # This would be solved if no packages included any folders
220
221 # This can be done in two ways:
222 # 1 - We assume that if we take out the filename from the path we would get
223 # the folder string, then we would check if folder string is in the list of folders
224 # This would not work if a package contains a folder which contains another folder
225 # e.g. path/folder1/folder2/filename folder_string= path/folder1/folder2
226 # folder_string would not match any value contained in the list of folders
227 #
228 # 2 - We do it the other way around, checking if the folder is contained in the path
229 # e.g. path/folder1/folder2/filename folder_string= path/folder1/folder2
230 # is folder_string inside path/folder1/folder2/filename?,
231 # Yes, it works, but we waste a couple of milliseconds.
232
233 inFolders=False
234 for folder in allfolders:
235 if folder in item:
236 inFolders = True # Did we find a folder?
237 folderFound = False # Second flag to break inner for
238 # Loop only through packages which contain folders
239 for keyfolder in hasfolders:
240 if (folderFound == False):
241 #print('Checking folder %s on package %s' % (item,keyfolder))
242 for file_folder in old_manifest[keyfolder]['files'] or file_folder in old_manifest[keyfolder]['cached']:
243 if file_folder==folder:
244 print ('%s found in %s' % (folder, keyfolder))
245 folderFound = True
246 if keyfolder not in new_manifest[key]['rdepends'] and keyfolder != key:
247 new_manifest[key]['rdepends'].append(keyfolder)
248
249 else:
250 break
251
252 # A folder was found so we're done with this item, we can go on
253 if inFolders:
254 continue
255
256 # We might already have it on the dictionary since it could depend on a (previously checked) module
257 if item not in new_manifest[key]['files'] and item not in new_manifest[key]['cached']:
258 # Handle core as a special package, we already did it so we pass it to NEW data structure directly
259 if key=='core':
260 print('Adding %s to %s FILES' % (item, key))
261 if item.endswith('*'):
262 wildcards.append(item)
263 if isCached(item):
264 new_manifest[key]['cached'].append(item)
265 else:
266 new_manifest[key]['files'].append(item)
267
268 # Check for repeated files
269 if item not in allfiles:
270 allfiles.append(item)
271 else:
272 repeated.append(item)
273
274 else:
275
276 # Check if this dependency is already contained on another package, so we add it
277 # as an RDEPENDS, or if its not, it means it should be contained on the current
278 # package, so we should add it to FILES
279 for newkey in old_manifest:
280 # Debug
281 #print('Checking %s ' % item + ' in %s' % newkey)
282 if item in old_manifest[newkey]['files'] or item in old_manifest[newkey]['cached']:
283 # Since were nesting, we need to check its not the same key
284 if(newkey!=key):
285 if newkey not in new_manifest[key]['rdepends']:
286 # Add it to the new manifest data struct
287 # Debug
288 print('Adding %s to %s RDEPENDS, because it contains %s' % (newkey, key, item))
289 new_manifest[key]['rdepends'].append(newkey)
290 break
291 else:
292 # Debug
293 print('Adding %s to %s FILES' % (item, key))
294 # Since it wasnt found on another package, its not an RDEP, so add it to FILES for this package
295 if isCached(item):
296 new_manifest[key]['cached'].append(item)
297 else:
298 new_manifest[key]['files'].append(item)
299 if item.endswith('*'):
300 wildcards.append(item)
301 if item not in allfiles:
302 allfiles.append(item)
303 else:
304 repeated.append(item)
305
306print ('The following files are repeated (contained in more than one package), please check which package should get it:')
307print (repeated)
308print('The following files contain wildcards, please check they are necessary')
309print(wildcards)
310print('The following files contain folders, please check they are necessary')
311print(hasfolders)
312
313# Sort it just so it looks nicer
314for key in new_manifest:
315 new_manifest[key]['files'].sort()
316 new_manifest[key]['cached'].sort()
317 new_manifest[key]['rdepends'].sort()
318
319# Create the manifest from the data structure that was built
320with open('python3-manifest.json.new','w') as outfile:
321 json.dump(new_manifest,outfile,sort_keys=True, indent=4)