diff options
Diffstat (limited to 'meta/recipes-devtools/python/python3/create_manifest3.py')
| -rw-r--r-- | meta/recipes-devtools/python/python3/create_manifest3.py | 321 |
1 files changed, 321 insertions, 0 deletions
diff --git a/meta/recipes-devtools/python/python3/create_manifest3.py b/meta/recipes-devtools/python/python3/create_manifest3.py new file mode 100644 index 0000000000..ead27e9fcc --- /dev/null +++ b/meta/recipes-devtools/python/python3/create_manifest3.py | |||
| @@ -0,0 +1,321 @@ | |||
| 1 | # This script is used as a bitbake task to create a new python manifest | ||
| 2 | # $ bitbake python -c create_manifest | ||
| 3 | # | ||
| 4 | # Our goal is to keep python-core as small as posible and add other python | ||
| 5 | # packages only when the user needs them, hence why we split upstream python | ||
| 6 | # into several packages. | ||
| 7 | # | ||
| 8 | # In a very simplistic way what this does is: | ||
| 9 | # Launch python and see specifically what is required for it to run at a minimum | ||
| 10 | # | ||
| 11 | # Go through the python-manifest file and launch a separate task for every single | ||
| 12 | # one of the files on each package, this task will check what was required for that | ||
| 13 | # specific module to run, these modules will be called dependencies. | ||
| 14 | # The output of such task will be a list of the modules or dependencies that were | ||
| 15 | # found for that file. | ||
| 16 | # | ||
| 17 | # Such output will be parsed by this script, we will look for each dependency on the | ||
| 18 | # manifest and if we find that another package already includes it, then we will add | ||
| 19 | # that package as an RDEPENDS to the package we are currently checking; in case we dont | ||
| 20 | # find the current dependency on any other package we will add it to the current package | ||
| 21 | # as part of FILES. | ||
| 22 | # | ||
| 23 | # | ||
| 24 | # This way we will create a new manifest from the data structure that was built during | ||
| 25 | # this process, ont this new manifest each package will contain specifically only | ||
| 26 | # what it needs to run. | ||
| 27 | # | ||
| 28 | # There are some caveats which we try to deal with, such as repeated files on different | ||
| 29 | # packages, packages that include folders, wildcards, and special packages. | ||
| 30 | # Its also important to note that this method only works for python files, and shared | ||
| 31 | # libraries. Static libraries, header files and binaries need to be dealt with manually. | ||
| 32 | # | ||
| 33 | # This script differs from its python2 version mostly on how shared libraries are handled | ||
| 34 | # The manifest file for python3 has an extra field which contains the cached files for | ||
| 35 | # each package. | ||
| 36 | # Tha method to handle cached files does not work when a module includes a folder which | ||
| 37 | # itself contains the pycache folder, gladly this is almost never the case. | ||
| 38 | # | ||
| 39 | # Author: Alejandro Enedino Hernandez Samaniego "aehs29" <aehs29@gmail.com> | ||
| 40 | |||
| 41 | |||
| 42 | import sys | ||
| 43 | import subprocess | ||
| 44 | import json | ||
| 45 | import os | ||
| 46 | |||
| 47 | # Hack to get native python search path (for folders), not fond of it but it works for now | ||
| 48 | pivot='recipe-sysroot-native' | ||
| 49 | for p in sys.path: | ||
| 50 | if pivot in p: | ||
| 51 | nativelibfolder=p[:p.find(pivot)+len(pivot)] | ||
| 52 | |||
| 53 | # Empty dict to hold the whole manifest | ||
| 54 | new_manifest = {} | ||
| 55 | |||
| 56 | # Check for repeated files, folders and wildcards | ||
| 57 | allfiles=[] | ||
| 58 | repeated=[] | ||
| 59 | wildcards=[] | ||
| 60 | |||
| 61 | hasfolders=[] | ||
| 62 | allfolders=[] | ||
| 63 | |||
| 64 | def isFolder(value): | ||
| 65 | if os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib64')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib32')): | ||
| 66 | return True | ||
| 67 | else: | ||
| 68 | return False | ||
| 69 | |||
| 70 | def isCached(item): | ||
| 71 | if '__pycache__' in item: | ||
| 72 | return True | ||
| 73 | else: | ||
| 74 | return False | ||
| 75 | |||
| 76 | # Read existing JSON manifest | ||
| 77 | with open('python3-manifest.json') as manifest: | ||
| 78 | old_manifest=json.load(manifest) | ||
| 79 | |||
| 80 | |||
| 81 | # First pass to get core-package functionality, because we base everything on the fact that core is actually working | ||
| 82 | # Not exactly the same so it should not be a function | ||
| 83 | print ('Getting dependencies for core package:') | ||
| 84 | |||
| 85 | # Special call to check for core package | ||
| 86 | output = subprocess.check_output([sys.executable, 'get_module_deps3.py', 'python-core-package']).decode('utf8') | ||
| 87 | for item in output.split(): | ||
| 88 | # We append it so it doesnt hurt what we currently have: | ||
| 89 | if isCached(item): | ||
| 90 | if item not in old_manifest['core']['cached']: | ||
| 91 | # We use the same data structure since its the one which will be used to check | ||
| 92 | # dependencies for other packages | ||
| 93 | old_manifest['core']['cached'].append(item) | ||
| 94 | else: | ||
| 95 | if item not in old_manifest['core']['files']: | ||
| 96 | # We use the same data structure since its the one which will be used to check | ||
| 97 | # dependencies for other packages | ||
| 98 | old_manifest['core']['files'].append(item) | ||
| 99 | |||
| 100 | for value in old_manifest['core']['files']: | ||
| 101 | # Ignore folders, since we don't import those, difficult to handle multilib | ||
| 102 | if isFolder(value): | ||
| 103 | # Pass it directly | ||
| 104 | if isCached(value): | ||
| 105 | if value not in old_manifest['core']['cached']: | ||
| 106 | old_manifest['core']['cached'].append(value) | ||
| 107 | else: | ||
| 108 | if value not in old_manifest['core']['files']: | ||
| 109 | old_manifest['core']['files'].append(value) | ||
| 110 | continue | ||
| 111 | # Ignore binaries, since we don't import those, assume it was added correctly (manually) | ||
| 112 | if '${bindir}' in value: | ||
| 113 | # Pass it directly | ||
| 114 | if value not in old_manifest['core']['files']: | ||
| 115 | old_manifest['core']['files'].append(value) | ||
| 116 | continue | ||
| 117 | # Ignore empty values | ||
| 118 | if value == '': | ||
| 119 | continue | ||
| 120 | if '${includedir}' in value: | ||
| 121 | if value not in old_manifest['core']['files']: | ||
| 122 | old_manifest['core']['files'].append(value) | ||
| 123 | continue | ||
| 124 | # Get module name , shouldnt be affected by libdir/bindir | ||
| 125 | value = os.path.splitext(os.path.basename(os.path.normpath(value)))[0] | ||
| 126 | |||
| 127 | |||
| 128 | # Launch separate task for each module for deterministic behavior | ||
| 129 | # Each module will only import what is necessary for it to work in specific | ||
| 130 | print ('Getting dependencies for module: %s' % value) | ||
| 131 | output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % value]).decode('utf8') | ||
| 132 | print (output) | ||
| 133 | for item in output.split(): | ||
| 134 | # We append it so it doesnt hurt what we currently have: | ||
| 135 | if isCached(item): | ||
| 136 | if item not in old_manifest['core']['cached']: | ||
| 137 | # We use the same data structure since its the one which will be used to check | ||
| 138 | # dependencies for other packages | ||
| 139 | old_manifest['core']['cached'].append(item) | ||
| 140 | else: | ||
| 141 | if item not in old_manifest['core']['files']: | ||
| 142 | # We use the same data structure since its the one which will be used to check | ||
| 143 | # dependencies for other packages | ||
| 144 | old_manifest['core']['files'].append(item) | ||
| 145 | |||
| 146 | |||
| 147 | # We check which packages include folders | ||
| 148 | for key in old_manifest: | ||
| 149 | for value in old_manifest[key]['files']: | ||
| 150 | # Ignore folders, since we don't import those, difficult to handle multilib | ||
| 151 | if isFolder(value): | ||
| 152 | print ('%s is a folder' % value) | ||
| 153 | if key not in hasfolders: | ||
| 154 | hasfolders.append(key) | ||
| 155 | if value not in allfolders: | ||
| 156 | allfolders.append(value) | ||
| 157 | |||
| 158 | for key in old_manifest: | ||
| 159 | # Use an empty dict as data structure to hold data for each package and fill it up | ||
| 160 | new_manifest[key]={} | ||
| 161 | new_manifest[key]['files']=[] | ||
| 162 | |||
| 163 | new_manifest[key]['rdepends']=[] | ||
| 164 | # All packages should depend on core | ||
| 165 | if key != 'core': | ||
| 166 | new_manifest[key]['rdepends'].append('core') | ||
| 167 | new_manifest[key]['cached']=[] | ||
| 168 | else: | ||
| 169 | new_manifest[key]['cached']=old_manifest[key]['cached'] | ||
| 170 | new_manifest[key]['summary']=old_manifest[key]['summary'] | ||
| 171 | |||
| 172 | # Handle special cases, we assume that when they were manually added | ||
| 173 | # to the manifest we knew what we were doing. | ||
| 174 | print ('Handling package %s' % key) | ||
| 175 | special_packages=['misc', 'modules', 'dev'] | ||
| 176 | if key in special_packages or 'staticdev' in key: | ||
| 177 | print('Passing %s package directly' % key) | ||
| 178 | new_manifest[key]=old_manifest[key] | ||
| 179 | continue | ||
| 180 | |||
| 181 | for value in old_manifest[key]['files']: | ||
| 182 | # We already handled core on the first pass | ||
| 183 | if key == 'core': | ||
| 184 | new_manifest[key]['files'].append(value) | ||
| 185 | continue | ||
| 186 | # Ignore folders, since we don't import those, difficult to handle multilib | ||
| 187 | if isFolder(value): | ||
| 188 | # Pass folders directly | ||
| 189 | new_manifest[key]['files'].append(value) | ||
| 190 | # Ignore binaries, since we don't import those | ||
| 191 | if '${bindir}' in value: | ||
| 192 | # Pass it directly to the new manifest data structure | ||
| 193 | if value not in new_manifest[key]['files']: | ||
| 194 | new_manifest[key]['files'].append(value) | ||
| 195 | continue | ||
| 196 | # Ignore empty values | ||
| 197 | if value == '': | ||
| 198 | continue | ||
| 199 | if '${includedir}' in value: | ||
| 200 | if value not in new_manifest[key]['files']: | ||
| 201 | new_manifest[key]['files'].append(value) | ||
| 202 | continue | ||
| 203 | # Get module name , shouldnt be affected by libdir/bindir | ||
| 204 | value = os.path.splitext(os.path.basename(os.path.normpath(value)))[0] | ||
| 205 | |||
| 206 | # Launch separate task for each module for deterministic behavior | ||
| 207 | # Each module will only import what is necessary for it to work in specific | ||
| 208 | print ('Getting dependencies for module: %s' % value) | ||
| 209 | output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % value]).decode('utf8') | ||
| 210 | # We can print dependencies for debugging purposes | ||
| 211 | print (output) | ||
| 212 | # Output will have all dependencies | ||
| 213 | for item in output.split(): | ||
| 214 | |||
| 215 | # Warning: This first part is ugly | ||
| 216 | # One of the dependencies that was found, could be inside of one of the folders included by another package | ||
| 217 | # We need to check if this happens so we can add the package containing the folder as an rdependency | ||
| 218 | # e.g. Folder encodings contained in codecs | ||
| 219 | # This would be solved if no packages included any folders | ||
| 220 | |||
| 221 | # This can be done in two ways: | ||
| 222 | # 1 - We assume that if we take out the filename from the path we would get | ||
| 223 | # the folder string, then we would check if folder string is in the list of folders | ||
| 224 | # This would not work if a package contains a folder which contains another folder | ||
| 225 | # e.g. path/folder1/folder2/filename folder_string= path/folder1/folder2 | ||
| 226 | # folder_string would not match any value contained in the list of folders | ||
| 227 | # | ||
| 228 | # 2 - We do it the other way around, checking if the folder is contained in the path | ||
| 229 | # e.g. path/folder1/folder2/filename folder_string= path/folder1/folder2 | ||
| 230 | # is folder_string inside path/folder1/folder2/filename?, | ||
| 231 | # Yes, it works, but we waste a couple of milliseconds. | ||
| 232 | |||
| 233 | inFolders=False | ||
| 234 | for folder in allfolders: | ||
| 235 | if folder in item: | ||
| 236 | inFolders = True # Did we find a folder? | ||
| 237 | folderFound = False # Second flag to break inner for | ||
| 238 | # Loop only through packages which contain folders | ||
| 239 | for keyfolder in hasfolders: | ||
| 240 | if (folderFound == False): | ||
| 241 | #print('Checking folder %s on package %s' % (item,keyfolder)) | ||
| 242 | for file_folder in old_manifest[keyfolder]['files'] or file_folder in old_manifest[keyfolder]['cached']: | ||
| 243 | if file_folder==folder: | ||
| 244 | print ('%s found in %s' % (folder, keyfolder)) | ||
| 245 | folderFound = True | ||
| 246 | if keyfolder not in new_manifest[key]['rdepends'] and keyfolder != key: | ||
| 247 | new_manifest[key]['rdepends'].append(keyfolder) | ||
| 248 | |||
| 249 | else: | ||
| 250 | break | ||
| 251 | |||
| 252 | # A folder was found so we're done with this item, we can go on | ||
| 253 | if inFolders: | ||
| 254 | continue | ||
| 255 | |||
| 256 | # We might already have it on the dictionary since it could depend on a (previously checked) module | ||
| 257 | if item not in new_manifest[key]['files'] and item not in new_manifest[key]['cached']: | ||
| 258 | # Handle core as a special package, we already did it so we pass it to NEW data structure directly | ||
| 259 | if key=='core': | ||
| 260 | print('Adding %s to %s FILES' % (item, key)) | ||
| 261 | if item.endswith('*'): | ||
| 262 | wildcards.append(item) | ||
| 263 | if isCached(item): | ||
| 264 | new_manifest[key]['cached'].append(item) | ||
| 265 | else: | ||
| 266 | new_manifest[key]['files'].append(item) | ||
| 267 | |||
| 268 | # Check for repeated files | ||
| 269 | if item not in allfiles: | ||
| 270 | allfiles.append(item) | ||
| 271 | else: | ||
| 272 | repeated.append(item) | ||
| 273 | |||
| 274 | else: | ||
| 275 | |||
| 276 | # Check if this dependency is already contained on another package, so we add it | ||
| 277 | # as an RDEPENDS, or if its not, it means it should be contained on the current | ||
| 278 | # package, so we should add it to FILES | ||
| 279 | for newkey in old_manifest: | ||
| 280 | # Debug | ||
| 281 | #print('Checking %s ' % item + ' in %s' % newkey) | ||
| 282 | if item in old_manifest[newkey]['files'] or item in old_manifest[newkey]['cached']: | ||
| 283 | # Since were nesting, we need to check its not the same key | ||
| 284 | if(newkey!=key): | ||
| 285 | if newkey not in new_manifest[key]['rdepends']: | ||
| 286 | # Add it to the new manifest data struct | ||
| 287 | # Debug | ||
| 288 | print('Adding %s to %s RDEPENDS, because it contains %s' % (newkey, key, item)) | ||
| 289 | new_manifest[key]['rdepends'].append(newkey) | ||
| 290 | break | ||
| 291 | else: | ||
| 292 | # Debug | ||
| 293 | print('Adding %s to %s FILES' % (item, key)) | ||
| 294 | # Since it wasnt found on another package, its not an RDEP, so add it to FILES for this package | ||
| 295 | if isCached(item): | ||
| 296 | new_manifest[key]['cached'].append(item) | ||
| 297 | else: | ||
| 298 | new_manifest[key]['files'].append(item) | ||
| 299 | if item.endswith('*'): | ||
| 300 | wildcards.append(item) | ||
| 301 | if item not in allfiles: | ||
| 302 | allfiles.append(item) | ||
| 303 | else: | ||
| 304 | repeated.append(item) | ||
| 305 | |||
| 306 | print ('The following files are repeated (contained in more than one package), please check which package should get it:') | ||
| 307 | print (repeated) | ||
| 308 | print('The following files contain wildcards, please check they are necessary') | ||
| 309 | print(wildcards) | ||
| 310 | print('The following files contain folders, please check they are necessary') | ||
| 311 | print(hasfolders) | ||
| 312 | |||
| 313 | # Sort it just so it looks nicer | ||
| 314 | for key in new_manifest: | ||
| 315 | new_manifest[key]['files'].sort() | ||
| 316 | new_manifest[key]['cached'].sort() | ||
| 317 | new_manifest[key]['rdepends'].sort() | ||
| 318 | |||
| 319 | # Create the manifest from the data structure that was built | ||
| 320 | with open('python3-manifest.json.new','w') as outfile: | ||
| 321 | json.dump(new_manifest,outfile,sort_keys=True, indent=4) | ||
